From 66c3a6d1834176d6231068ee00c6b4bec01c9fd8 Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Tue, 14 Jan 2025 13:35:29 +0100 Subject: [PATCH 1/8] AB#340942 Added the source code of Sparrow storage engine --- .gitignore | 4 + client/mysql_config_editor.cc | 8 +- include/my_sys.h | 1 + include/mysql.h.pp | 2 + include/mysql_com.h | 2 + libmysql/CMakeLists.txt | 2 + mysys/my_winfile.cc | 7 + sql-common/get_password.cc | 38 + sql/field.cc | 17 + sql/handler.h | 15 + sql/item_create.cc | 17 + sql/item_strfunc.h | 70 + sql/item_timefunc.h | 78 + sql/lex.h | 3 + sql/mysqld.cc | 6 + sql/sql_yacc.yy | 28 + storage/sparrow/CMakeLists.txt | 199 ++ storage/sparrow/api/api_assert.h | 83 + storage/sparrow/api/atomic.h | 190 ++ storage/sparrow/api/bufferlist.cc | 272 +++ storage/sparrow/api/bufferlist.h | 101 + storage/sparrow/api/compress.cc | 98 + storage/sparrow/api/compress.h | 26 + storage/sparrow/api/cond.h | 151 ++ storage/sparrow/api/hash.h | 743 ++++++ storage/sparrow/api/include/connection.h | 163 ++ storage/sparrow/api/include/exception.h | 135 + storage/sparrow/api/include/exceptwrapper.h | 331 +++ storage/sparrow/api/include/global.h | 43 + storage/sparrow/api/include/master.h | 60 + storage/sparrow/api/include/old/spw_global.h | 82 + storage/sparrow/api/include/sparrowbuffer.h | 76 + storage/sparrow/api/include/table.h | 59 + storage/sparrow/api/include/types.h | 191 ++ storage/sparrow/api/interval.h | 332 +++ storage/sparrow/api/intervaltree.h | 599 +++++ storage/sparrow/api/ipaddress.cc | 371 +++ storage/sparrow/api/ipaddress.h | 74 + storage/sparrow/api/list.h | 1005 ++++++++ storage/sparrow/api/lock.h | 313 +++ storage/sparrow/api/memalloc.cc | 84 + storage/sparrow/api/memalloc.h | 23 + storage/sparrow/api/misc.h | 303 +++ storage/sparrow/api/sema.h | 60 + storage/sparrow/api/serial.cc | 164 ++ storage/sparrow/api/serial.h | 742 ++++++ storage/sparrow/api/socketutil.cc | 247 ++ storage/sparrow/api/socketutil.h | 138 ++ storage/sparrow/api/spw_connection.cc | 1070 ++++++++ storage/sparrow/api/spw_connection.h | 274 +++ storage/sparrow/api/spw_master.cc | 71 + storage/sparrow/api/spw_master.h | 184 ++ storage/sparrow/api/spw_sparrowbuffer.cc | 234 ++ storage/sparrow/api/spw_sparrowbuffer.h | 60 + storage/sparrow/api/spw_table.cc | 209 ++ storage/sparrow/api/spw_table.h | 185 ++ storage/sparrow/api/spw_types.cc | 24 + storage/sparrow/api/spw_types.h | 834 +++++++ storage/sparrow/api/str.h | 309 +++ storage/sparrow/api/thread.cc | 14 + storage/sparrow/api/thread.h | 117 + storage/sparrow/api/vec.h | 1411 +++++++++++ storage/sparrow/api_test/CMakeLists.txt | 35 + storage/sparrow/api_test/all_types.cpp | 408 ++++ storage/sparrow/api_test/all_types.h | 244 ++ storage/sparrow/api_test/column_optim.cpp | 512 ++++ storage/sparrow/api_test/column_optim.h | 35 + storage/sparrow/api_test/column_subset.cpp | 430 ++++ storage/sparrow/api_test/column_subset.h | 83 + storage/sparrow/api_test/common.cpp | 76 + storage/sparrow/api_test/common.h | 28 + storage/sparrow/api_test/errors.cpp | 143 ++ storage/sparrow/api_test/errors.h | 21 + storage/sparrow/api_test/exception.cc | 106 + storage/sparrow/api_test/exception.h | 56 + storage/sparrow/api_test/many_partitions.cpp | 154 ++ storage/sparrow/api_test/many_partitions.h | 22 + storage/sparrow/api_test/sparrow_api_test.cpp | 119 + storage/sparrow/api_test/sql.cpp | 72 + storage/sparrow/api_test/sql.h | 41 + storage/sparrow/api_test/too_many_columns.cpp | 187 ++ storage/sparrow/api_test/too_many_columns.h | 23 + storage/sparrow/api_test/utils.h | 58 + storage/sparrow/api_test/vl.cpp | 219 ++ storage/sparrow/api_test/vl.h | 22 + storage/sparrow/dns/dns.cc | 273 +++ storage/sparrow/dns/dns.h | 276 +++ storage/sparrow/dns/dnscache.cc | 194 ++ storage/sparrow/dns/dnscache.h | 237 ++ storage/sparrow/dns/dnsconfiguration.cc | 265 ++ storage/sparrow/dns/dnsconfiguration.h | 279 +++ storage/sparrow/dns/dnsdefault.cc | 80 + storage/sparrow/dns/dnsdefault.h | 37 + storage/sparrow/dns/dnsnet.cc | 386 +++ storage/sparrow/dns/dnsnet.h | 41 + storage/sparrow/dns/dnsserver.cc | 193 ++ storage/sparrow/dns/dnsserver.h | 229 ++ storage/sparrow/engine/alter.cc | 210 ++ storage/sparrow/engine/alter.h | 175 ++ storage/sparrow/engine/atomic.h | 190 ++ storage/sparrow/engine/binbuffer.cc | 96 + storage/sparrow/engine/binbuffer.h | 238 ++ storage/sparrow/engine/cache.cc | 537 ++++ storage/sparrow/engine/cache.h | 1243 ++++++++++ storage/sparrow/engine/coalescing.cc | 503 ++++ storage/sparrow/engine/coalescing.h | 343 +++ storage/sparrow/engine/compress.cc | 98 + storage/sparrow/engine/compress.h | 29 + storage/sparrow/engine/cond.h | 170 ++ storage/sparrow/engine/condition.cc | 423 ++++ storage/sparrow/engine/condition.h | 53 + storage/sparrow/engine/context.cc | 931 +++++++ storage/sparrow/engine/context.h | 430 ++++ storage/sparrow/engine/exception.h | 66 + storage/sparrow/engine/fileutil.cc | 963 ++++++++ storage/sparrow/engine/fileutil.h | 1188 +++++++++ storage/sparrow/engine/flush.cc | 96 + storage/sparrow/engine/flush.h | 212 ++ storage/sparrow/engine/hash.h | 768 ++++++ storage/sparrow/engine/internalapi.cc | 1091 +++++++++ storage/sparrow/engine/internalapi.h | 153 ++ storage/sparrow/engine/interval.h | 332 +++ storage/sparrow/engine/intervaltree.h | 601 +++++ storage/sparrow/engine/io.cc | 699 ++++++ storage/sparrow/engine/io.h | 326 +++ storage/sparrow/engine/list.h | 1003 ++++++++ storage/sparrow/engine/listener.cc | 718 ++++++ storage/sparrow/engine/listener.h | 348 +++ storage/sparrow/engine/lock.h | 399 +++ storage/sparrow/engine/log.h | 45 + storage/sparrow/engine/master.cc | 2163 +++++++++++++++++ storage/sparrow/engine/master.h | 868 +++++++ storage/sparrow/engine/misc.h | 625 +++++ storage/sparrow/engine/partition.h | 447 ++++ storage/sparrow/engine/persistent.cc | 1125 +++++++++ storage/sparrow/engine/persistent.h | 369 +++ storage/sparrow/engine/purge.cc | 293 +++ storage/sparrow/engine/purge.h | 98 + storage/sparrow/engine/queue.h | 158 ++ storage/sparrow/engine/scheduler.cc | 149 ++ storage/sparrow/engine/scheduler.h | 216 ++ storage/sparrow/engine/search.h | 140 ++ storage/sparrow/engine/sema.h | 58 + storage/sparrow/engine/serial.cc | 240 ++ storage/sparrow/engine/serial.h | 724 ++++++ storage/sparrow/engine/socketutil.cc | 203 ++ storage/sparrow/engine/socketutil.h | 131 + storage/sparrow/engine/sort.cc | 48 + storage/sparrow/engine/sort.h | 220 ++ storage/sparrow/engine/thread.cc | 172 ++ storage/sparrow/engine/thread.h | 555 +++++ storage/sparrow/engine/transient.cc | 1800 ++++++++++++++ storage/sparrow/engine/transient.h | 1150 +++++++++ storage/sparrow/engine/treeorder.cc | 94 + storage/sparrow/engine/treeorder.h | 114 + storage/sparrow/engine/types.cc | 222 ++ storage/sparrow/engine/types.h | 710 ++++++ storage/sparrow/engine/vec.h | 1481 +++++++++++ storage/sparrow/functions/functions.cc | 560 +++++ storage/sparrow/functions/functions.h | 14 + storage/sparrow/functions/ipaddress.cc | 372 +++ storage/sparrow/functions/ipaddress.h | 74 + storage/sparrow/handler/field.cc | 749 ++++++ storage/sparrow/handler/field.h | 901 +++++++ storage/sparrow/handler/hasparrow.cc | 2156 ++++++++++++++++ storage/sparrow/handler/hasparrow.h | 671 +++++ storage/sparrow/handler/plugin.cc | 691 ++++++ storage/sparrow/handler/plugin.h | 214 ++ storage/sparrow/sparrow.ini | 300 +++ storage/sparrow/udf/CMakeLists.txt | 18 + storage/sparrow/udf/operator.cc | 1 + storage/sparrow/udf/operator.h | 419 ++++ storage/sparrow/udf/udf.cc | 594 +++++ storage/sparrow/udf/udf.def | 14 + storage/sparrow/udf/udf.h | 48 + storage/sparrow/udf/udfargument.cc | 81 + storage/sparrow/udf/udfargument.h | 423 ++++ 177 files changed, 55885 insertions(+), 1 deletion(-) create mode 100644 storage/sparrow/CMakeLists.txt create mode 100644 storage/sparrow/api/api_assert.h create mode 100644 storage/sparrow/api/atomic.h create mode 100644 storage/sparrow/api/bufferlist.cc create mode 100644 storage/sparrow/api/bufferlist.h create mode 100644 storage/sparrow/api/compress.cc create mode 100644 storage/sparrow/api/compress.h create mode 100644 storage/sparrow/api/cond.h create mode 100644 storage/sparrow/api/hash.h create mode 100644 storage/sparrow/api/include/connection.h create mode 100644 storage/sparrow/api/include/exception.h create mode 100644 storage/sparrow/api/include/exceptwrapper.h create mode 100644 storage/sparrow/api/include/global.h create mode 100644 storage/sparrow/api/include/master.h create mode 100644 storage/sparrow/api/include/old/spw_global.h create mode 100644 storage/sparrow/api/include/sparrowbuffer.h create mode 100644 storage/sparrow/api/include/table.h create mode 100644 storage/sparrow/api/include/types.h create mode 100644 storage/sparrow/api/interval.h create mode 100644 storage/sparrow/api/intervaltree.h create mode 100644 storage/sparrow/api/ipaddress.cc create mode 100644 storage/sparrow/api/ipaddress.h create mode 100644 storage/sparrow/api/list.h create mode 100644 storage/sparrow/api/lock.h create mode 100644 storage/sparrow/api/memalloc.cc create mode 100644 storage/sparrow/api/memalloc.h create mode 100644 storage/sparrow/api/misc.h create mode 100644 storage/sparrow/api/sema.h create mode 100644 storage/sparrow/api/serial.cc create mode 100644 storage/sparrow/api/serial.h create mode 100644 storage/sparrow/api/socketutil.cc create mode 100644 storage/sparrow/api/socketutil.h create mode 100644 storage/sparrow/api/spw_connection.cc create mode 100644 storage/sparrow/api/spw_connection.h create mode 100644 storage/sparrow/api/spw_master.cc create mode 100644 storage/sparrow/api/spw_master.h create mode 100644 storage/sparrow/api/spw_sparrowbuffer.cc create mode 100644 storage/sparrow/api/spw_sparrowbuffer.h create mode 100644 storage/sparrow/api/spw_table.cc create mode 100644 storage/sparrow/api/spw_table.h create mode 100644 storage/sparrow/api/spw_types.cc create mode 100644 storage/sparrow/api/spw_types.h create mode 100644 storage/sparrow/api/str.h create mode 100644 storage/sparrow/api/thread.cc create mode 100644 storage/sparrow/api/thread.h create mode 100644 storage/sparrow/api/vec.h create mode 100644 storage/sparrow/api_test/CMakeLists.txt create mode 100644 storage/sparrow/api_test/all_types.cpp create mode 100644 storage/sparrow/api_test/all_types.h create mode 100644 storage/sparrow/api_test/column_optim.cpp create mode 100644 storage/sparrow/api_test/column_optim.h create mode 100644 storage/sparrow/api_test/column_subset.cpp create mode 100644 storage/sparrow/api_test/column_subset.h create mode 100644 storage/sparrow/api_test/common.cpp create mode 100644 storage/sparrow/api_test/common.h create mode 100644 storage/sparrow/api_test/errors.cpp create mode 100644 storage/sparrow/api_test/errors.h create mode 100644 storage/sparrow/api_test/exception.cc create mode 100644 storage/sparrow/api_test/exception.h create mode 100644 storage/sparrow/api_test/many_partitions.cpp create mode 100644 storage/sparrow/api_test/many_partitions.h create mode 100644 storage/sparrow/api_test/sparrow_api_test.cpp create mode 100644 storage/sparrow/api_test/sql.cpp create mode 100644 storage/sparrow/api_test/sql.h create mode 100644 storage/sparrow/api_test/too_many_columns.cpp create mode 100644 storage/sparrow/api_test/too_many_columns.h create mode 100644 storage/sparrow/api_test/utils.h create mode 100644 storage/sparrow/api_test/vl.cpp create mode 100644 storage/sparrow/api_test/vl.h create mode 100644 storage/sparrow/dns/dns.cc create mode 100644 storage/sparrow/dns/dns.h create mode 100644 storage/sparrow/dns/dnscache.cc create mode 100644 storage/sparrow/dns/dnscache.h create mode 100644 storage/sparrow/dns/dnsconfiguration.cc create mode 100644 storage/sparrow/dns/dnsconfiguration.h create mode 100644 storage/sparrow/dns/dnsdefault.cc create mode 100644 storage/sparrow/dns/dnsdefault.h create mode 100644 storage/sparrow/dns/dnsnet.cc create mode 100644 storage/sparrow/dns/dnsnet.h create mode 100644 storage/sparrow/dns/dnsserver.cc create mode 100644 storage/sparrow/dns/dnsserver.h create mode 100644 storage/sparrow/engine/alter.cc create mode 100644 storage/sparrow/engine/alter.h create mode 100644 storage/sparrow/engine/atomic.h create mode 100644 storage/sparrow/engine/binbuffer.cc create mode 100644 storage/sparrow/engine/binbuffer.h create mode 100644 storage/sparrow/engine/cache.cc create mode 100644 storage/sparrow/engine/cache.h create mode 100644 storage/sparrow/engine/coalescing.cc create mode 100644 storage/sparrow/engine/coalescing.h create mode 100644 storage/sparrow/engine/compress.cc create mode 100644 storage/sparrow/engine/compress.h create mode 100644 storage/sparrow/engine/cond.h create mode 100644 storage/sparrow/engine/condition.cc create mode 100644 storage/sparrow/engine/condition.h create mode 100644 storage/sparrow/engine/context.cc create mode 100644 storage/sparrow/engine/context.h create mode 100644 storage/sparrow/engine/exception.h create mode 100644 storage/sparrow/engine/fileutil.cc create mode 100644 storage/sparrow/engine/fileutil.h create mode 100644 storage/sparrow/engine/flush.cc create mode 100644 storage/sparrow/engine/flush.h create mode 100644 storage/sparrow/engine/hash.h create mode 100644 storage/sparrow/engine/internalapi.cc create mode 100644 storage/sparrow/engine/internalapi.h create mode 100644 storage/sparrow/engine/interval.h create mode 100644 storage/sparrow/engine/intervaltree.h create mode 100644 storage/sparrow/engine/io.cc create mode 100644 storage/sparrow/engine/io.h create mode 100644 storage/sparrow/engine/list.h create mode 100644 storage/sparrow/engine/listener.cc create mode 100644 storage/sparrow/engine/listener.h create mode 100644 storage/sparrow/engine/lock.h create mode 100644 storage/sparrow/engine/log.h create mode 100644 storage/sparrow/engine/master.cc create mode 100644 storage/sparrow/engine/master.h create mode 100644 storage/sparrow/engine/misc.h create mode 100644 storage/sparrow/engine/partition.h create mode 100644 storage/sparrow/engine/persistent.cc create mode 100644 storage/sparrow/engine/persistent.h create mode 100644 storage/sparrow/engine/purge.cc create mode 100644 storage/sparrow/engine/purge.h create mode 100644 storage/sparrow/engine/queue.h create mode 100644 storage/sparrow/engine/scheduler.cc create mode 100644 storage/sparrow/engine/scheduler.h create mode 100644 storage/sparrow/engine/search.h create mode 100644 storage/sparrow/engine/sema.h create mode 100644 storage/sparrow/engine/serial.cc create mode 100644 storage/sparrow/engine/serial.h create mode 100644 storage/sparrow/engine/socketutil.cc create mode 100644 storage/sparrow/engine/socketutil.h create mode 100644 storage/sparrow/engine/sort.cc create mode 100644 storage/sparrow/engine/sort.h create mode 100644 storage/sparrow/engine/thread.cc create mode 100644 storage/sparrow/engine/thread.h create mode 100644 storage/sparrow/engine/transient.cc create mode 100644 storage/sparrow/engine/transient.h create mode 100644 storage/sparrow/engine/treeorder.cc create mode 100644 storage/sparrow/engine/treeorder.h create mode 100644 storage/sparrow/engine/types.cc create mode 100644 storage/sparrow/engine/types.h create mode 100644 storage/sparrow/engine/vec.h create mode 100644 storage/sparrow/functions/functions.cc create mode 100644 storage/sparrow/functions/functions.h create mode 100644 storage/sparrow/functions/ipaddress.cc create mode 100644 storage/sparrow/functions/ipaddress.h create mode 100644 storage/sparrow/handler/field.cc create mode 100644 storage/sparrow/handler/field.h create mode 100644 storage/sparrow/handler/hasparrow.cc create mode 100644 storage/sparrow/handler/hasparrow.h create mode 100644 storage/sparrow/handler/plugin.cc create mode 100644 storage/sparrow/handler/plugin.h create mode 100644 storage/sparrow/sparrow.ini create mode 100644 storage/sparrow/udf/CMakeLists.txt create mode 100644 storage/sparrow/udf/operator.cc create mode 100644 storage/sparrow/udf/operator.h create mode 100644 storage/sparrow/udf/udf.cc create mode 100644 storage/sparrow/udf/udf.def create mode 100644 storage/sparrow/udf/udf.h create mode 100644 storage/sparrow/udf/udfargument.cc create mode 100644 storage/sparrow/udf/udfargument.h diff --git a/.gitignore b/.gitignore index b453d8b56780..e7d98ef9c056 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,7 @@ scalability_jobs_* .cproject .project .settings/ + +*.bak + +_build/ \ No newline at end of file diff --git a/client/mysql_config_editor.cc b/client/mysql_config_editor.cc index 7950a7972085..60fc84dd013e 100644 --- a/client/mysql_config_editor.cc +++ b/client/mysql_config_editor.cc @@ -501,7 +501,13 @@ static int set_command(void) { init_dynamic_string(&path_buf, "", MY_LINE_MAX); init_dynamic_string(&file_buf, "", file_size); - if (tty_password) opt_password = get_tty_password(NullS); + if (tty_password) + { + if (get_istty_stdin() == 1) + opt_password= get_tty_password_fromstdin(); + else + opt_password= get_tty_password(NullS); + } if (file_size) { if (read_and_decrypt_file(&file_buf) == -1) goto error; diff --git a/include/my_sys.h b/include/my_sys.h index 5a5fc3a7d34e..f020b4ed8ad7 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -643,6 +643,7 @@ extern int nt_share_delete(const char *name, myf MyFlags); #ifdef _WIN32 /* Windows-only functions (CRT equivalents)*/ extern HANDLE my_get_osfhandle(File fd); +extern File my_get_filedescr(HANDLE handle, int oflag); extern void my_osmaperr(unsigned long last_error); #endif diff --git a/include/mysql.h.pp b/include/mysql.h.pp index f490355e7ead..2b4a6332e959 100644 --- a/include/mysql.h.pp +++ b/include/mysql.h.pp @@ -273,6 +273,8 @@ bool generate_sha256_scramble(unsigned char *dst, size_t dst_size, const char *src, size_t src_size, const char *rnd, size_t rnd_size); +bool get_istty_stdin(void); +char *get_tty_password_fromstdin(void); char *get_tty_password(const char *opt_message); const char *mysql_errno_to_sqlstate(unsigned int mysql_errno); bool my_thread_init(void); diff --git a/include/mysql_com.h b/include/mysql_com.h index b377c77b7076..063e92d5aad4 100644 --- a/include/mysql_com.h +++ b/include/mysql_com.h @@ -1162,6 +1162,8 @@ bool generate_sha256_scramble(unsigned char *dst, size_t dst_size, #ifdef __cplusplus extern "C" { #endif +bool get_istty_stdin(void); +char *get_tty_password_fromstdin(void); char *get_tty_password(const char *opt_message); #ifdef __cplusplus } diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt index f3669e2a2637..3204acfdb6ba 100644 --- a/libmysql/CMakeLists.txt +++ b/libmysql/CMakeLists.txt @@ -156,6 +156,8 @@ SET(CLIENT_API_FUNCTIONS # Once the decision is taken to have documentation we need to move them to # CLIENT_API_FUNCTIONS list. SET(CLIENT_API_FUNCTIONS_UNDOCUMENTED + get_istty_stdin + get_tty_password_fromstdin get_tty_password # my_load_defaults is a wrapper for load_defaults and it is not documented. # We will have a FR to replace this for decent name/functionality and diff --git a/mysys/my_winfile.cc b/mysys/my_winfile.cc index 33d352fe71a8..82f5ccc27134 100644 --- a/mysys/my_winfile.cc +++ b/mysys/my_winfile.cc @@ -410,6 +410,13 @@ HANDLE my_get_osfhandle(File fd) { return GetHandleInfo(fd).handle; } +File my_get_filedescr(HANDLE handle, int oflag) { + DBUG_TRACE; + + return RegisterHandle(handle, oflag); +} + + /** Homegrown posix emulation for Windows. diff --git a/sql-common/get_password.cc b/sql-common/get_password.cc index c14988ae9931..d7b9f88c6ac7 100644 --- a/sql-common/get_password.cc +++ b/sql-common/get_password.cc @@ -73,6 +73,44 @@ #define getpass(A) getpassphrase(A) #endif +bool get_istty_stdin(void) +{ +#if defined (_WIN32) + if ( isatty(_fileno(stdin))) + return 0; +#elif defined (__unix) + if ( isatty(STDIN_FILENO)) + return 0; +#else + #error Environment not supported +#endif + return 1; +} + +char *get_tty_password_fromstdin(void) +{ + int c = 0; + int i=0; + char buff[80]; + + DBUG_ENTER("get_tty_password_fromstdin"); + memset(buff,0,sizeof(buff)); +#if defined(_WIN32) + while ((c = getc (stdin)) != EOF && !isspace (c) && c!= '\r') + { + buff[i++] = (char)c; + } +#elif defined (__unix) + while ((c = getc (stdin)) != EOF && !isspace (c) && c!= '\r' && c != '\n') + { + buff[i++] = (char)c; + } +#else +#error Environment not supported +#endif + DBUG_RETURN(my_strdup(PSI_NOT_INSTRUMENTED, buff, MYF(MY_FAE))); +} + #if defined(_WIN32) /* were just going to fake it here and get input from the keyboard */ char *get_tty_password(const char *opt_message) { diff --git a/sql/field.cc b/sql/field.cc index 06a48ea7a412..8010dd359956 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -5232,6 +5232,23 @@ bool Field_timestampf::get_date_internal_at(const Time_zone *tz, my_timestamp_from_binary(&tm, ptr, dec); if (tm.m_tv_sec == 0) return true; tz->gmt_sec_to_TIME(ltime, tm); + + TimeCache* cache = (table != 0 && table->file != nullptr) ? &table->file->timeCache_ : nullptr; + if (cache != nullptr && tm.m_tv_sec == cache->seconds_ && tz == cache->tz_) { + *ltime = cache->mtime_; + ltime->second_part = tm.m_tv_usec; + } else { + THD* thd = current_thd; + thd->time_zone()->gmt_sec_to_TIME(ltime, tm); + + if (cache != nullptr) { + cache->seconds_ = tm.m_tv_sec; + cache->tz_ = tz; + cache->mtime_ = *ltime; + cache->mtime_.second_part = 0; + } + } + return false; } diff --git a/sql/handler.h b/sql/handler.h index ac761b0b7525..f14e8e04e473 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -58,6 +58,7 @@ #include "my_inttypes.h" #include "my_io.h" #include "my_sys.h" +#include "mysql_time.h" #include "my_table_map.h" #include "my_thread_local.h" // my_errno #include "mysql/components/services/bits/psi_table_bits.h" @@ -670,6 +671,7 @@ enum legacy_db_type { DB_TYPE_PERFORMANCE_SCHEMA, DB_TYPE_TEMPTABLE, DB_TYPE_FIRST_DYNAMIC = 42, + DB_TYPE_SPARROW=77, DB_TYPE_DEFAULT = 127 // Must be last }; @@ -3928,6 +3930,17 @@ class ha_statistics { table_in_mem_estimate(IN_MEMORY_ESTIMATE_UNKNOWN) {} }; +class Time_zone; // Defined in sql/tztime.h + +struct TimeCache { + uint32 seconds_; + MYSQL_TIME mtime_; + const Time_zone* tz_; + TimeCache() : seconds_(0), tz_(0) { + memset( &mtime_, 0, sizeof(mtime_) ); + } +}; + /** Calculates length of key. @@ -4429,6 +4442,8 @@ class handler { /** Pointer to duplicate row */ uchar *dup_ref; + TimeCache timeCache_; + ha_statistics stats; /* MultiRangeRead-related members: */ diff --git a/sql/item_create.cc b/sql/item_create.cc index 89d5d3b05d7f..085e96c0838a 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -1678,6 +1678,23 @@ static const std::pair func_array[] = { {"WEEKDAY", SQL_FACTORY(Weekday_instantiator)}, {"WEEKOFYEAR", SQL_FACTORY(Weekofyear_instantiator)}, {"YEARWEEK", SQL_FACTORY(Yearweek_instantiator)}, + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Start of InfoVista functions + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + {"IPTOSTR", SQL_FN(Item_func_iptostr, 1)}, + {"STRTOIP", SQL_FN(Item_func_strtoip, 1)}, + {"MASKIP", SQL_FN(Item_func_maskip, 1)}, + {"GETIPMASK", SQL_FN(Item_func_getipmask, 1)}, + {"ISIPPRIVATE", SQL_FN(Item_func_isipprivate, 1)}, + {"GETNEWEST", SQL_FN(Item_func_getnewest, 2)}, + {"GETOLDEST", SQL_FN(Item_func_getoldest, 2)}, + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // End of InfoVista functions + ////////////////////////////////////////////////////////////////////////////////////////////////////// + {"GET_DD_COLUMN_PRIVILEGES", SQL_FN_INTERNAL(Item_func_get_dd_column_privileges, 3)}, {"GET_DD_INDEX_SUB_PART_LENGTH", diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 29aae44486b4..b4ce8990ad06 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -1772,4 +1772,74 @@ class Item_func_internal_get_dd_column_extra final : public Item_str_func { String *val_str(String *) override; }; +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Start of InfoVista functions +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Item_func_iptostr final : public Item_str_func { + String tmp_value; + +public: + Item_func_iptostr(const POS &pos, Item *a) : Item_str_func(pos, a) {} + String* val_str(String* str) override; + bool resolve_type(THD *thd) override; + const char* func_name() const override { return "iptostr"; } +}; + +class Item_func_strtoip final : public Item_str_func { + String tmp_value; + +public: + Item_func_strtoip(const POS &pos, Item *a) : Item_str_func(pos, a) {} + String* val_str(String* str) override; + bool resolve_type(THD *thd) override; + const char* func_name() const override { return "strtoip"; } +}; + +class Item_func_maskip final : public Item_str_func { + String tmp_value; + +public: + Item_func_maskip(const POS &pos, Item *a) : Item_str_func(pos, a) {} + String* val_str(String* str) override; + bool resolve_type(THD *thd) override; + const char* func_name() const override { return "maskip"; } +}; + +class Item_func_getipmask final : public Item_str_func { + String tmp_value; + +public: + Item_func_getipmask(const POS &pos, Item *a) : Item_str_func(pos, a) {} + String* val_str(String* str) override; + bool resolve_type(THD *thd) override; + const char* func_name() const override { return "getipmask"; } +}; + +class Item_func_isipprivate : public Item_int_func { +public: + Item_func_isipprivate(const POS &pos, Item *a) : Item_int_func(pos, a) {} + bool resolve_type(THD *thd) override; + const char* func_name() const override { return "isipprivate"; } + bool is_bool_func() const override { return true; } + uint decimal_precision() const override { return 1; } + + // void fix_length_and_dec() { + // maybe_null = 1; + // decimals = 0; + // max_length = 1; + // } + bool val_bool() override; + longlong val_int() override { + return (longlong)val_bool(); + } +}; + + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// End of InfoVista functions +////////////////////////////////////////////////////////////////////////////////////////////////////// + #endif /* ITEM_STRFUNC_INCLUDED */ diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index 1879831add5d..cb0ec7f04951 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -1688,6 +1688,84 @@ class Item_func_internal_check_time final : public Item_datetime_func { bool get_date(MYSQL_TIME *res, my_time_flags_t fuzzy_date) override; }; +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Start of InfoVista functions +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Item_func_tadjust : public Item_int_func { +protected: + + const interval_type intervalType_; + const int nArg_; + const int fdowArg_; + +private: + + int getN(); + int getFdow(); + longlong getSeconds(MYSQL_TIME* t); + +public: + + Item_func_tadjust(const POS &pos, Item* arg1, + interval_type intervalType, int nArg, int fdowArg) + : Item_int_func(pos, arg1), + intervalType_(intervalType), nArg_(nArg), fdowArg_(fdowArg) { + } + + Item_func_tadjust(const POS &pos, Item* arg1, Item* arg2, + interval_type intervalType, int nArg, int fdowArg) + : Item_int_func(pos, arg1, arg2), + intervalType_(intervalType), nArg_(nArg), fdowArg_(fdowArg) { + } + const char* func_name() const override { + return intervalType_ == INTERVAL_WEEK ? "tadjustw" : "tadjust"; + } + + longlong val_int() override; + bool resolve_type(THD *thd) override; +}; + + +class Item_func_tnext : public Item_int_func { +protected: + + const interval_type intervalType_; + +public: + + Item_func_tnext(const POS &pos, Item* arg1, Item* arg2, interval_type intervalType) + : Item_int_func(pos, arg1, arg2), + intervalType_(intervalType) { + } + const char* func_name() const override { return "tnext"; } + longlong val_int() override; + bool resolve_type(THD *thd) override; +}; + + +class Item_func_getnewest : public Item_int_func { +public: + Item_func_getnewest(const POS &pos, Item *a, Item *b) + : Item_int_func(pos, a, b) { + } + const char* func_name() const override { return "getnewest"; } + longlong val_int() override; + bool resolve_type(THD *thd) override; +}; + +class Item_func_getoldest : public Item_int_func { +public: + Item_func_getoldest(const POS &pos, Item *a, Item *b) + : Item_int_func(pos, a, b) { + } + const char* func_name() const override { return "getoldest"; } + longlong val_int() override; + bool resolve_type(THD *thd) override; +}; + + + /* Function prototypes */ bool make_date_time(Date_time_format *format, MYSQL_TIME *l_time, diff --git a/sql/lex.h b/sql/lex.h index 5cfde4ebca5a..8eb70070f5dc 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -866,6 +866,9 @@ static const SYMBOL symbols[] = { {SYM_FN("SUM", SUM_SYM)}, {SYM_FN("SYSDATE", SYSDATE)}, {SYM_FN("SYSTEM_USER", USER)}, + {SYM_FN("TADJUST", TADJUST_SYM)}, + {SYM_FN("TADJUSTW", TADJUSTW_SYM)}, + {SYM_FN("TNEXT", TNEXT_SYM)}, {SYM_FN("TRIM", TRIM)}, {SYM_FN("VARIANCE", VARIANCE_SYM)}, {SYM_FN("VAR_POP", VARIANCE_SYM)}, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e176b83b17e6..0c12432ca6f0 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -879,6 +879,7 @@ MySQL clients support the protocol: #include "sql_common.h" // mysql_client_plugin_init #include "sql_string.h" #include "storage/myisam/ha_myisam.h" // HA_RECOVER_OFF +#include "storage/sparrow/handler/hasparrow.h" #include "storage/perfschema/pfs_services.h" #include "thr_lock.h" #include "thr_mutex.h" @@ -2602,6 +2603,8 @@ static void clean_up(bool print_message) { if (set_server_shutting_down()) return; + Sparrow::SparrowHandler::stop_slave_threads(); + ha_pre_dd_shutdown(); dd::shutdown(); @@ -7880,6 +7883,9 @@ int mysqld_main(int argc, char **argv) set_ports(); if (init_server_components()) unireg_abort(MYSQLD_ABORT_EXIT); + + sql_print_information("%s (mysqld %s) starting as process %lu ...", + my_progname, server_version, (ulong) getpid()); if (!server_id_supplied) LogErr(INFORMATION_LEVEL, ER_WARN_NO_SERVERID_SPECIFIED); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index d4484824a1fd..1e84a16a9626 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1399,6 +1399,10 @@ void warn_on_deprecated_user_defined_collation( %token URL_SYM 1202 /* MYSQL */ %token GENERATE_SYM 1203 /* MYSQL */ +%token TADJUST_SYM 1204 /* Infovista */ +%token TADJUSTW_SYM 1205 /* Infovista */ +%token TNEXT_SYM 1206 /* Infovista */ + /* Precedence rules used to resolve the ambiguity when using keywords as idents in the case e.g.: @@ -11150,6 +11154,30 @@ sum_expr: { $$= NEW_PTN Item_sum_std(@$, $3, 0, $5); } + | TADJUST_SYM '(' expr ',' INTERVAL_SYM expr interval ')' + { + $$= new (YYTHD->mem_root) Item_func_tadjust(@$, $3,$6,$7,1,-1); + if ($$ == NULL) + MYSQL_YYABORT; + } + | TNEXT_SYM '(' expr ',' INTERVAL_SYM expr interval ')' + { + $$= new (YYTHD->mem_root) Item_func_tnext(@$, $3,$6,$7); + if ($$ == NULL) + MYSQL_YYABORT; + } + | TADJUSTW_SYM '(' expr ')' + { + $$= new (YYTHD->mem_root) Item_func_tadjust(@$, $3,INTERVAL_WEEK,-1,-1); + if ($$ == NULL) + MYSQL_YYABORT; + } + | TADJUSTW_SYM '(' expr ',' expr ')' + { + $$= new (YYTHD->mem_root) Item_func_tadjust(@$, $3,$5,INTERVAL_WEEK,-1,1); + if ($$ == NULL) + MYSQL_YYABORT; + } | VARIANCE_SYM '(' in_sum_expr ')' opt_windowing_clause { $$= NEW_PTN Item_sum_variance(@$, $3, 0, $5); diff --git a/storage/sparrow/CMakeLists.txt b/storage/sparrow/CMakeLists.txt new file mode 100644 index 000000000000..07798088a2d0 --- /dev/null +++ b/storage/sparrow/CMakeLists.txt @@ -0,0 +1,199 @@ +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + SET(SPARROW_LIBS aio) +ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows") + SET(SPARROW_LIBS dnsapi) +ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") + SET(SPARROW_LIBS nsl resolv) +ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + SET(SPARROW_LIBS resolv) +ENDIF() + +SET(SPARROW_SOURCES + handler/field.h + handler/hasparrow.h + handler/plugin.h + handler/hasparrow.cc + handler/field.cc + handler/plugin.cc + engine/alter.h + engine/atomic.h + engine/binbuffer.h + engine/cache.h + engine/coalescing.h + engine/compress.h + engine/cond.h + engine/condition.h + engine/context.h + engine/exception.h + engine/fileutil.h + engine/flush.h + engine/hash.h + engine/internalapi.h + engine/interval.h + engine/intervaltree.h + engine/io.h + engine/list.h + engine/listener.h + engine/lock.h + engine/log.h + engine/master.h + engine/misc.h + engine/partition.h + engine/persistent.h + engine/purge.h + engine/queue.h + engine/scheduler.h + engine/search.h + engine/sema.h + engine/serial.h + engine/socketutil.h + engine/sort.h + engine/thread.h + engine/transient.h + engine/treeorder.h + engine/types.h + engine/vec.h + engine/condition.cc + engine/context.cc + engine/master.cc + engine/fileutil.cc + engine/internalapi.cc + engine/serial.cc + engine/types.cc + engine/scheduler.cc + engine/treeorder.cc + engine/transient.cc + engine/flush.cc + engine/persistent.cc + engine/sort.cc + engine/listener.cc + engine/thread.cc + engine/cache.cc + engine/socketutil.cc + engine/alter.cc + engine/purge.cc + engine/binbuffer.cc + engine/compress.cc + engine/coalescing.cc + engine/io.cc + functions/functions.h + functions/ipaddress.h + functions/functions.cc + functions/ipaddress.cc + dns/dns.h + dns/dnscache.h + dns/dnsconfiguration.h + dns/dnsdefault.h + dns/dnsnet.h + dns/dnsserver.h + dns/dns.cc + dns/dnsnet.cc + dns/dnscache.cc + dns/dnsdefault.cc + dns/dnsconfiguration.cc + dns/dnsserver.cc) + + +ADD_DEFINITIONS(-DLOG_SUBSYSTEM_TAG="Sparrow") + +MYSQL_ADD_PLUGIN(sparrow ${SPARROW_SOURCES} + STORAGE_ENGINE DEFAULT + MODULE_OUTPUT_NAME ha_sparrow + LINK_LIBRARIES ${SPARROW_LIBS} extra::rapidjson ext::zlib +) +# LINK_LIBRARIES ${SPARROW_LIBS} extra::rapidjson ext::zlib mysys + +IF(UNIX) + IF(MY_COMPILER_IS_GNU) + STRING_APPEND(CMAKE_CXX_FLAGS " -Wno-unused-parameter") + ENDIF() +ELSE() + STRING_APPEND(CMAKE_CXX_FLAGS " /wd4101") +ENDIF() + +SET(SPARROW_API_SOURCES + api/api_assert.h + api/atomic.h + api/bufferlist.cc + api/bufferlist.h + api/compress.cc + api/compress.h + api/cond.h + api/memalloc.cc + api/memalloc.h + api/hash.h + api/interval.h + api/intervaltree.h + api/ipaddress.cc + api/ipaddress.h + api/list.h + api/lock.h + api/misc.h + api/sema.h + api/serial.cc + api/serial.h + api/socketutil.cc + api/socketutil.h + api/spw_connection.cc + api/spw_connection.h + api/spw_master.cc + api/spw_master.h + api/spw_sparrowbuffer.cc + api/spw_sparrowbuffer.h + api/spw_table.cc + api/spw_table.h + api/spw_types.cc + api/spw_types.h + api/str.h + api/thread.cc + api/thread.h + api/vec.h + api/include/connection.h + api/include/global.h + api/include/master.h + api/include/exception.h + api/include/exceptwrapper.h + api/include/sparrowbuffer.h + api/include/table.h + api/include/types.h) + +SET(SPARROW_API_FUNCTIONS + initialize + createConnect + releaseConnect + errmsg + + CACHE INTERNAL "Functions exported by Sparrow API") + +ADD_CONVENIENCE_LIBRARY(sparrow_api ${SPARROW_API_SOURCES} + LINK_LIBRARIES ext::zlib +) +TARGET_INCLUDE_DIRECTORIES(sparrow_api PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/api/include) +MESSAGE(STATUS "[Sparrow] Current value of CMAKE_CURRENT_SOURCE_DIR is ${CMAKE_CURRENT_SOURCE_DIR}") + +ADD_DEFINITIONS(-DSPARROW_API_EXPORTS -DHAVE_YASSL) + +# BPL: The MERGE_LIBRARIES does not exist anymore. Anyway, i'm not sure this is mandatory. Let's see how the build goes without that macro call. +# MERGE_LIBRARIES(sparrowapi SHARED sparrow_api mysqlclient EXPORTS ${SPARROW_API_FUNCTIONS}) + +# MERGE_CONVENIENCE_LIBRARIES(mysqlclient ${LIBS_TO_MERGE} + # COMPONENT Development + # LINK_LIBRARIES ${LIBS_TO_LINK} +# ) + +# Merge several convenience libraries into one big mysqlclient and link them together into shared library. +MERGE_LIBRARIES_SHARED(sparrowapi sparrow_api mysqlclient + EXPORTS + ${SPARROW_API_FUNCTIONS} +) + + + +ADD_SUBDIRECTORY(api_test) +# Missing dependency on api/include + +ADD_SUBDIRECTORY(udf) + +# IF(CMAKE_SYSTEM_NAME MATCHES "SunOS") +# SET_TARGET_PROPERTIES(sparrowapi PROPERTIES LINK_FLAGS "${LINK_FLAGS} -R $ORIGIN/../lib") +# ENDIF() diff --git a/storage/sparrow/api/api_assert.h b/storage/sparrow/api/api_assert.h new file mode 100644 index 000000000000..432ecb89e445 --- /dev/null +++ b/storage/sparrow/api/api_assert.h @@ -0,0 +1,83 @@ +#ifndef _spw_api_assert_h +#define _spw_api_assert_h + +#include + +#include "my_compiler.h" + + +/* +** Generic Macro +*/ + +inline void spwAssertionFailure (const char* expr, const char* filename, int lineno) +{ + const char* text = "assertion failed: %s, in file %s, line %d\n"; + printf(text, expr, filename, lineno); +#ifdef _WIN32 + DebugBreak(); +#endif + abort(); +} + +#define _SPW_ASSERT(a) do { if ((a) == 0) spwAssertionFailure (#a, __FILE__, __LINE__); } while (0) + +void PRINT_DBUG(const char* format, ...) MY_ATTRIBUTE((format(printf, 1, 2))); +void PRINT_INFO(const char* format, ...) MY_ATTRIBUTE((format(printf, 1, 2))); +void PRINT_WARN(const char* format, ...) MY_ATTRIBUTE((format(printf, 1, 2))); +void PRINT_ERR(const char* format, ...) MY_ATTRIBUTE((format(printf, 1, 2))); + +inline void PRINT_WARN(const char* format, ...) { + va_list args; + va_start(args, format); + fprintf(stdout, "warn: "); + vfprintf(stdout, format, args); + fprintf(stdout, "\n"); + va_end(args); +} + +inline void PRINT_ERR(const char* format, ...) { + va_list args; + va_start(args, format); + fprintf(stdout, "error: "); + vfprintf(stdout, format, args); + fprintf(stdout, "\n"); + va_end(args); +} + +#ifdef NDEBUG + +#define PRINT_DBUG(...) +#define PRINT_INFO(...) +#define SPW_ASSERT(a) +#define SPW_dbgASSERT(a) +#define SPW_relASSERT(a) _SPW_ASSERT(a) + +#else + +inline void PRINT_DBUG(const char* format, ...) { + va_list args; + va_start(args, format); + fprintf(stdout, "debug: "); + vfprintf(stdout, format, args); + fprintf(stdout, "\n"); + va_end(args); +} + +inline void PRINT_INFO(const char* format, ...) { + va_list args; + va_start(args, format); + fprintf(stdout, "info: "); + vfprintf(stdout, format, args); + fprintf(stdout, "\n"); + va_end(args); +} + +#define SPW_ASSERT(a) _SPW_ASSERT(a) +#define SPW_dbgASSERT(a) _SPW_ASSERT(a) +#define SPW_relASSERT(a) _SPW_ASSERT(a) + +#endif + + +#endif // _spw_api_assert_h diff --git a/storage/sparrow/api/atomic.h b/storage/sparrow/api/atomic.h new file mode 100644 index 000000000000..0073b1042ca1 --- /dev/null +++ b/storage/sparrow/api/atomic.h @@ -0,0 +1,190 @@ +/* + Simple atomic operations. +*/ + +#ifndef _spw_api_atomic_h_ +#define _spw_api_atomic_h_ + +#ifdef _WIN32 +#include +#include +#if (MSVC_VER >= 1500) +// This one can be intrinsinc only with Visual Studio 2008. +#pragma intrinsic(_InterlockedAdd) +#endif +#pragma intrinsic(_InterlockedExchangeAdd) +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) +#pragma intrinsic(_InterlockedCompareExchange) +#elif defined(__SunOS) +#include +#endif +#include "include/global.h" + +namespace Sparrow { + +class Atomic { +private: + + Atomic(); + +public: + +#ifdef _WIN64 + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return _InterlockedExchangeAdd(reinterpret_cast(target), delta); + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + return _InterlockedExchangeAdd64(reinterpret_cast(target), delta); + } + static uint32_t inc32(volatile uint32_t* target) { + return _InterlockedIncrement(reinterpret_cast(target)); + } + static uint32_t dec32(volatile uint32_t* target) { + return _InterlockedDecrement(reinterpret_cast(target)); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return _InterlockedCompareExchange(reinterpret_cast(target), newval, cmp) == cmp; + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return _InterlockedCompareExchange64(reinterpret_cast(target), newval, cmp) == cmp; + } +#elif defined(_WIN32) + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return InterlockedExchangeAdd(reinterpret_cast(target), delta) + delta; + } + // 64-bit interlocked functions for 32-bit platforms are available only in Vista, + // so use assembly code. + static uint64_t interlockedCompareExchange64(volatile uint64_t* target, const uint64_t value, const uint64_t comp){ + __asm { + mov esi, [target] + mov ebx, dword ptr [value] + mov ecx, dword ptr [value + 4] + mov eax, dword ptr [comp] + mov edx, dword ptr [comp + 4] + lock cmpxchg8b [esi] + } + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return interlockedCompareExchange64(target, newval, cmp) == cmp; + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + uint64_t old; + do { + old = *target; + } while (!cas64(target, old, old + delta)); + return old + delta; + } + static uint32_t inc32(volatile uint32_t* target) { + return InterlockedIncrement(reinterpret_cast(target)); + } + static uint32_t dec32(volatile uint32_t* target) { + return InterlockedDecrement(reinterpret_cast(target)); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return InterlockedCompareExchange(reinterpret_cast(target), newval, cmp) == cmp; + } +#elif defined(__SunOS) // Solaris. + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return atomic_add_32_nv(target, delta); + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + return atomic_add_64_nv((volatile uint64_t*)target, delta); + } + static uint32_t inc32(volatile uint32_t* target) { + return atomic_inc_32_nv(target); + } + static uint32_t dec32(volatile uint32_t* target) { + return atomic_dec_32_nv(target); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return atomic_cas_32(target, cmp, newval) == cmp; + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return atomic_cas_64(reinterpret_cast(target), cmp, newval) == cmp; + } +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) // gcc or Intel Compiler. + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return __sync_add_and_fetch(target, delta); + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + return __sync_add_and_fetch(target, delta); + } + static uint32_t inc32(volatile uint32_t* target) { + return __sync_add_and_fetch(target, 1); + } + static uint32_t dec32(volatile uint32_t* target) { + return __sync_sub_and_fetch(target, 1); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return __sync_bool_compare_and_swap(target, cmp, newval); + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return __sync_bool_compare_and_swap(target, cmp, newval); + } +#elif defined(__x86_64__) // x64 support. + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + uint32_t result=0; + asm volatile ("lock; xaddl %0, %1" + : "=r"(result), "=m"(*target) + : "0"(delta), "m"(*target) + : "memory", "cc"); + return result + delta; + } + + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + uint64_t temp = static_cast(delta); + asm volatile("lock; xaddq %0,%1" + : "+r" (temp), "+m" (*target) + : : "memory"); + return temp + delta; + } + static uint32_t inc32(volatile uint32_t* target) { + return add32(target, 1); + } + static uint32_t dec32(volatile uint32_t* target) { + return add32(target, -1); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + uint32_t result; + asm volatile ("lock; cmpxchgl %1, %2" + : "=a" (result) + : "r" (newval), "m" (*target), "0" (cmp) + : "memory"); + return result == newval; + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + uint64_t result; + asm volatile("lock; cmpxchgq %1,%2" + : "=a" (result) + : "q" (newval), "m" (*target), "0" (cmp) + : "memory"); + return result == newval; + } +#else +#error Missing atomic functions +#endif + static void set64(volatile uint64_t* target, const uint64_t newVal) { + uint64_t old; + do { + old = *target; + } while (!cas64(target, old, newVal)); + } + static uint64_t get64(volatile uint64_t* target) { + uint64_t result; + do { + result = *target; + } while (!cas64(target, result, result)); + return result; + } + static uint64_t inc64(volatile uint64_t* target) { + return add64(target, 1); + } + static uint64_t dec64(volatile uint64_t* target) { + return add64(target, -1); + } +}; + +} + +#endif /* #ifndef _spw_api_atomic_h_ */ diff --git a/storage/sparrow/api/bufferlist.cc b/storage/sparrow/api/bufferlist.cc new file mode 100644 index 000000000000..0096d8548214 --- /dev/null +++ b/storage/sparrow/api/bufferlist.cc @@ -0,0 +1,272 @@ +#include "str.h" +#include "bufferlist.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BufferList +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Constructor for writable BufferList +BufferList::BufferList( uint32_t capacity, uint32_t size ) + : temp_(new uint8_t[8], 8) +{ + if ( capacity < size ) + throw SparrowException::create( false, SPW_API_BUFFER_FULL, "BufferList: illegal argument: capacity %u < size %u.", capacity, size ); + + capacity_ = capacity; + position_ = 0; + size_ = size; + buffer_ = -1; +} + +BufferList::~BufferList() +{ + uint8_t* temp = temp_.getData(); + if ( temp != NULL ) { + delete [] temp; + } +} + +void BufferList::clear() +{ + position_ = 0; + buffer_ = -1; + currentBuffer_.reset(); + buffers_.clear(); +} + +void BufferList::mark() +{ + savedPosition_ = position_; + savedBuffer_ = buffer_; + if ( buffer_ >= 0 ) { + savedBufferPosition_ = buffers_[buffer_]->position(); + } else { + savedBufferPosition_ = 0; + } +} + +void BufferList::reset() +{ + position_ = savedPosition_; + buffer_ = savedBuffer_; + while ( static_cast(buffers_.length()) > buffer_ + 1 ) { + buffers_.removeLast(); + } + if ( buffer_ >= 0 ) { + currentBuffer_ = buffers_[buffer_]; + currentBuffer_->position(savedBufferPosition_); + } else { + currentBuffer_.reset(); + } +} + +ByteBuffer& BufferList::makeRoom( uint32_t n ) _THROW_(SparrowException) +{ + if ( position_ + n > capacity_ ) + throw SparrowException::create( false, SPW_API_BUFFER_FULL, + "BufferList: Buffer overflow. Can't allocate %u more bytes." + "Current position %u, capacity %u", n, position_, capacity_ ); + + // Allocate one or more small buffers if necessary + uint32_t remaining = (currentBuffer_.get() == NULL ? 0 : currentBuffer_->remaining()); + while ( n > remaining ) { + RefByteBuffer b( new IOBuffer(size_) ); + buffers_.append( b ); + if ( n > size_ ) n-= size_; + else n = 0; + } + if ( remaining == 0 && (buffer_ + 1) < static_cast(buffers_.length()) ) { + buffer_++; + currentBuffer_ = buffers_[buffer_]; + } + + SPW_dbgASSERT(currentBuffer_.get() != NULL); + return *currentBuffer_; +} + +void BufferList::put( const ByteBuffer& value, bool extend ) +{ + uint32_t n = value.remaining(); + uint32_t saved = n; + ByteBuffer& buffer = (extend ? makeRoom(n) : *buffers_[buffer_]); + if ( buffer.remaining() >= n ) { + buffer << value; + } else { + uint32_t offset = 0; + while (true) { + ByteBuffer& b = *buffers_[buffer_]; + uint32_t length = std::min(b.remaining(), n); + b << ByteBuffer( value.getData() + offset, length ); + offset += length; + n -= length; + if ( n == 0 ) { + break; + } + buffer_++; + } + } + if ( extend ) { + position_ += saved; + } +} + +void BufferList::put(uint8_t value) +{ + ByteBuffer& buffer = makeRoom(1); + buffer << value; + position_ += 1; +} + +void BufferList::putShort(uint16_t value) +{ + ByteBuffer& buffer = makeRoom(2); + if ( buffer.remaining() >= 2 ) { + buffer << value; + } else { + ByteBuffer& temp = getTemp(); + uint32_t limit = temp.limit(); + temp << value; + temp.flip(); + put( temp, false ); + temp.limit( limit ); + } + position_ += 2; +} + +void BufferList::putInt(uint32_t value) +{ + ByteBuffer& buffer = makeRoom(4); + if ( buffer.remaining() >= 4 ) { + buffer << value; + } else { + ByteBuffer& temp = getTemp(); + uint32_t limit = temp.limit(); + temp << value; + temp.flip(); + put( temp, false ); + temp.limit( limit ); + } + position_ += 4; +} + +void BufferList::putLong(uint64_t value) +{ + ByteBuffer& buffer = makeRoom(8); + if ( buffer.remaining() >= 8 ) { + buffer << value; + } else { + ByteBuffer& temp = getTemp(); + uint32_t limit = temp.limit(); + temp << value; + temp.flip(); + put( temp, false ); + temp.limit( limit ); + } + position_ += 8; +} + +void BufferList::putDouble(double value) +{ + ByteBuffer& buffer = makeRoom(8); + if ( buffer.remaining() >= 8 ) { + buffer << value; + } else { + ByteBuffer& temp = getTemp(); + uint32_t limit = temp.limit(); + temp << value; + temp.flip(); + put( temp, false ); + temp.limit( limit ); + } + position_ += 8; +} + + +void BufferList::put(const ByteBuffer& value) +{ + put( value, true ); +} + +void BufferList::put(const uint8_t* value, uint32_t length) +{ + put( ByteBuffer(value, length), true ); +} + +void BufferList::put(const char* value) +{ + if ( !value ) return; + uint32_t length = static_cast(strlen(value)); + putInt( length ); + put( reinterpret_cast(value), length ); +} + +void BufferList::append( RefByteBuffer value ) +{ + buffers_.append( value ); + position_ += value->position(); +} + +void BufferList::append(const SYSvector& value) +{ + for ( uint32_t i=0; i& buffers ) _THROW_(SparrowException) +{ + if ( buffers.length() == 0 ) + throw SparrowException::create( false, SPW_API_FAILED, "ReadOnlyBufferList: illegal argument: empty buffers" ); + + position_ = 0; + buffer_ = -1; + currentData_ = NULL; + currentStart_ = UINT_MAX32; + currentEnd_ = 0; + + uint32_t capacity = 0; + uint32_t offset = 0; + for ( uint32_t i=0; i= 0 ) { + if ( pos >= currentStart_ && pos < currentEnd_ ) { + SPW_dbgASSERT(currentData_); + return currentData_[pos]; + } + } + + for ( buffer_=0; buffer_(buffers_.length()); ++buffer_ ) { + const ByteBuffer& buffer = buffers_[buffer_]; + uint32_t start = offsets_[buffer_]; + uint32_t end = start + buffer.position(); + if ( pos >= start && pos < end ) { + currentData_ = buffer.getData(); + currentStart_ = start; + currentEnd_ = end; + return currentData_[pos - start]; + } + } + throw SparrowException::create( false, SPW_API_FAILED, "ReadOnlyBufferList: Get(%u) out of range.", pos ); +} + + +} // namespace Sparrow diff --git a/storage/sparrow/api/bufferlist.h b/storage/sparrow/api/bufferlist.h new file mode 100644 index 000000000000..a7cba4dad34c --- /dev/null +++ b/storage/sparrow/api/bufferlist.h @@ -0,0 +1,101 @@ +#ifndef _spw_api_bufferlist_h_ +#define _spw_api_bufferlist_h_ + +#include "include/global.h" +#include "vec.h" +#include "serial.h" +#include "misc.h" + + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BufferList +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// List of byte buffers of fixed size (no overflow mechanism to automatically grow) +class BufferList +{ +private: + SYSvector buffers_; + SYSvector offsets_; + + uint32_t capacity_; // Total size of all buffers in the list + uint32_t position_; + uint32_t size_; // Size of each buffer in the list + + int buffer_; // index of current buffer + RefByteBuffer currentBuffer_; + + int savedBuffer_; + uint32_t savedPosition_; + uint32_t savedBufferPosition_; + + ByteBuffer temp_; + + ByteBuffer& getTemp() { + temp_.position(0); + return temp_; + } + + void put(const ByteBuffer& value, bool extend); + +public: + BufferList(uint32_t capacity, uint32_t size); + ~BufferList(); + + void clear(); + ByteBuffer& makeRoom(uint32_t n) _THROW_(SparrowException); + + // Copies data in our buffer list, allocating new buffers as required + void put(uint8_t value); + void putShort(uint16_t value); + void putInt(uint32_t value); + void putLong(uint64_t value); + void putDouble(double value); + void put(const ByteBuffer& value); + void put(const char* value); + void put(const uint8_t* value, uint32_t length); + + // Appends ByteBuffer references to the end of our buffer list - no memory copy + void append(RefByteBuffer value); + void append(const SYSvector& value); + + void mark(); + void reset(); + + uint32_t getPosition() const { return position_; } + + const SYSvector& getBuffers() const { return buffers_; } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ReadOnlyBufferList +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Makes a read-only copy of a list of buffers +class ReadOnlyBufferList +{ +private: + SYSvector buffers_; + SYSvector offsets_; + + uint32_t capacity_; + uint32_t position_; + + int buffer_; // Current buffer + const uint8_t* currentData_; + uint32_t currentStart_; + uint32_t currentEnd_; + +public: + ReadOnlyBufferList(const SYSvector& buffers) _THROW_(SparrowException); + + uint8_t get(uint32_t pos) _THROW_(SparrowException); + + uint32_t getPosition() const { return position_; } +}; + +} // namespace Sparrow + +#endif // #define _spw_api_bufferlist_h_ diff --git a/storage/sparrow/api/compress.cc b/storage/sparrow/api/compress.cc new file mode 100644 index 000000000000..b37caf7c1790 --- /dev/null +++ b/storage/sparrow/api/compress.cc @@ -0,0 +1,98 @@ +/* + Compression helpers. +*/ + +#include "compress.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// LZJB +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define NBBY 8 +#define MATCH_BITS 6 +#define MATCH_MIN 3 +#define MATCH_MAX ((1 << MATCH_BITS) + (MATCH_MIN - 1)) +#define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) +#define LEMPEL_SIZE 1024 + +// STATIC +size_t LZJB::compress(const uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len) { + const uint8_t* src = s_start; + uint8_t* dst = d_start; + const uint8_t* cpy; + uint8_t* copymap = 0; + int copymask = 1 << (NBBY - 1); + int mlen, offset, hash; + uint16_t* hp; + uint16_t lempel[LEMPEL_SIZE] = { 0 }; + while (src < s_start + s_len) { + if ((copymask <<= 1) == (1 << NBBY)) { + if (dst >= d_start + d_len - 1 - 2 * NBBY) { + return s_len; + } + copymask = 1; + copymap = dst; + *dst++ = 0; + } + if (src > s_start + s_len - MATCH_MAX) { + *dst++ = *src++; + continue; + } + hash = (src[0] << 16) + (src[1] << 8) + src[2]; + hash += hash >> 9; + hash += hash >> 5; + hp = &lempel[hash & (LEMPEL_SIZE - 1)]; + offset = (uint64_t)(src - *hp) & OFFSET_MASK; + *hp = static_cast((uint64_t)src); + cpy = src - offset; + if (cpy >= s_start && cpy != src && + src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) { + *copymap |= copymask; + for (mlen = MATCH_MIN; mlen < MATCH_MAX; ++mlen) { + if (src[mlen] != cpy[mlen]) { + break; + } + } + *dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | (offset >> NBBY); + *dst++ = static_cast(offset); + src += mlen; + } else { + *dst++ = *src++; + } + } + return dst - d_start; +} + +// STATIC +int LZJB::decompress(const uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len) { + const uint8_t* src = s_start; + uint8_t* dst = d_start; + uint8_t* d_end = (uint8_t*)d_start + d_len; + uint8_t* cpy; + uint8_t copymap = 0; + int copymask = 1 << (NBBY - 1); + while (dst < d_end) { + if ((copymask <<= 1) == (1 << NBBY)) { + copymask = 1; + copymap = *src++; + } + if (copymap & copymask) { + int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; + int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; + src += 2; + if ((cpy = dst - offset) < (uint8_t *)d_start) { + return -1; + } + while (--mlen >= 0 && dst < d_end) { + *dst++ = *cpy++; + } + } else { + *dst++ = *src++; + } + } + return 0; +} + +} diff --git a/storage/sparrow/api/compress.h b/storage/sparrow/api/compress.h new file mode 100644 index 000000000000..c7a8350edb10 --- /dev/null +++ b/storage/sparrow/api/compress.h @@ -0,0 +1,26 @@ +/* + Compression helpers. +*/ + +#ifndef _engine_compress_h_ +#define _engine_compress_h_ + +#include "include/global.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// LZJB +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class LZJB { +public: + + static size_t compress(const uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len); + + static int decompress(const uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len); +}; + +} + +#endif /* #ifndef _engine_compress_h_ */ diff --git a/storage/sparrow/api/cond.h b/storage/sparrow/api/cond.h new file mode 100644 index 000000000000..5cc935ae9c1a --- /dev/null +++ b/storage/sparrow/api/cond.h @@ -0,0 +1,151 @@ +/* + Condition variable. +*/ + +#ifndef _spw_api_cond_h_ +#define _spw_api_cond_h_ + +#include "lock.h" +#include "mysql/psi/mysql_cond.h" +#include "my_systime.h" + +namespace Sparrow { + +class Cond { +private: + + const char *m_name; + native_cond_t cond_; + Lock* lock_; + uint32_t volatile nbWaiters_; + uint8_t owned_:1; + uint8_t static_:1; + +private: + + static SYSslist& getStatics() { + static SYSslist statics; + return statics; + } + + void initialize() { + native_cond_init(&cond_); + } + + void clear() { + if (m_name != 0) { + native_cond_destroy(&cond_); + free(const_cast(m_name)); + if (owned_) { + delete lock_; + } + m_name = 0; + } + } + +public: + + Cond(const bool isStatic, const char* name) : lock_(new Lock(isStatic, name)), nbWaiters_(0), owned_(true), static_(isStatic) { + m_name = my_strdup(name, MYF(MY_FAE)); + if (static_) { + Cond::getStatics().append(this); + } else { + initialize(); + } + } + + Cond(const bool isStatic, Lock& lock, const char* name) : lock_(&lock), nbWaiters_(0), owned_(false), static_(isStatic) { + m_name = my_strdup(name, MYF(MY_FAE)); + if (static_) { + Cond::getStatics().append(this); + } else { + initialize(); + } + } + + Cond& operator = (const Cond&) = delete; + Cond(const Cond&) = delete; + + static void initializeStatics() { + SYSslistIterator iterator(Cond::getStatics()); + while (++iterator) { + iterator.key()->initialize(); + } + } + + static void deinitializeStatics() { + SYSslistIterator iterator(Cond::getStatics()); + while (++iterator) { + iterator.key()->clear(); + } + } + + ~Cond() { + clear(); + } + + Lock& getLock() { + return *lock_; + } + + void acquire() { + lock_->lock(); + } + + void release() { + lock_->unlock(); + } + + void signal(const bool acquired = false) { + if (!acquired) { + acquire(); + } + if (nbWaiters_ > 0) { + native_cond_signal(&cond_); + } + if (!acquired) { + release(); + } + } + + void signalAll(const bool acquired = false) { + if (!acquired) { + acquire(); + } + if (nbWaiters_ > 0) { + native_cond_broadcast(&cond_); + } + if (!acquired) { + release(); + } + } + + bool wait(const uint64_t milliseconds, const bool acquired = false) { + if (!acquired) { + acquire(); + } + nbWaiters_++; + int status; + if (milliseconds == 0) { + status = native_cond_wait(&cond_, lock_->get()); // Infinite wait. + } else { + struct timespec t; + const uint64_t nanoseconds = milliseconds * 1000000; + set_timespec_nsec(&t, nanoseconds); + status = native_cond_timedwait(&cond_, lock_->get(), &t); + } + nbWaiters_--; + if (!acquired) { + release(); + } + return (status == 0); + } + + bool wait(const bool acquired = false) { + return wait(0, acquired); + } +}; + +} + +#endif /* #ifndef _spw_api_cond_h_ */ diff --git a/storage/sparrow/api/hash.h b/storage/sparrow/api/hash.h new file mode 100644 index 000000000000..e9b7802d1027 --- /dev/null +++ b/storage/sparrow/api/hash.h @@ -0,0 +1,743 @@ +/* + Hash table types. + */ + +#ifndef _spw_api_hash_h_ +#define _spw_api_hash_h_ + +#include "vec.h" + +namespace Sparrow { + +// spread hash code +static inline uint32_t spreadHashCode(uint32_t h) { + h += ~(h << 9); + h ^= (h >> 14); + h += (h << 4); + h ^= (h >> 10); + return h; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SYShlink +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class SYShlink { +public: + + SYShlink(const T& object, const uint32_t hash, SYShlink* next); + SYShlink* getNext() const; + void setNext(SYShlink* next); + const T& getObject() const; + T& getObject(); + void setObject(const T& object); + void setHash(const uint32_t hash); + uint32_t getHash() const; + +protected: + + uint32_t hash_; + SYShlink* next_; + T object_; +}; + +template inline SYShlink::SYShlink(const T& object, const uint32_t hash, SYShlink* next) : + hash_(hash), next_(next), object_(object) { +} + +template inline SYShlink* SYShlink::getNext() const { + return next_; +} + +template inline void SYShlink::setNext(SYShlink* next) { + next_ = next; +} + +template inline const T& SYShlink::getObject() const { + return object_; +} + +template inline T& SYShlink::getObject() { + return object_; +} + +template inline void SYShlink::setObject(const T& object) { + object_ = object; +} + +template inline void SYShlink::setHash(const uint32_t hash) { + hash_ = hash; +} + +template inline uint32_t SYShlink::getHash() const { + return hash_; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Default allocator for hash tables +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class SYShAllocator { +private: + + uint32_t n_; + +public: + + SYShAllocator() : n_(0) { + } + + SYShlink* acquire(const T& object, const uint32_t hash, SYShlink* next) { + n_++; + return new SYShlink(object, hash, next); + } + + void release(SYShlink* link) { + n_--; + delete link; + } +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Pool allocator for hash tables +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class SYShPoolAllocator { +private: + + SYShlink* root_; + +public: + + SYShPoolAllocator() : + root_(0) { + } + ~SYShPoolAllocator() { + SYShlink* link = root_; + while (link != 0) { + SYShlink* next = link->getNext(); + delete link; + link = next; + } + } + SYShlink* acquire(const T& object, const uint32_t hash, SYShlink* next) { + if (root_ == 0) { + return new SYShlink(object, hash, next); + } else { + SYShlink* link = root_; + root_ = root_->getNext(); + link->setObject(object); + link->setHash(hash); + link->setNext(next); + return link; + } + } + void release(SYShlink* link) { + link->setNext(root_); + root_ = link; + } +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SYShashBase +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class SYShashBase { +protected: + + const uint32_t initial_; + SYSpVector , 0> vector_; + uint32_t items_; + +protected: + + void initialize(SYSpVector , 0>& vector); + uint32_t initialize(); + void extend(); + +public: + + SYShashBase(const uint32_t initial); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + int64_t getSize() const; +}; + +template inline void SYShashBase::initialize(SYSpVector , 0>& vector) { + for (uint32_t i = 0; i < vector.length(); ++i) { + vector[i] = 0; + } +} + +template inline uint32_t SYShashBase::initialize() { + if (vector_.isEmpty()) { + vector_.reshape(initial_); + initialize(vector_); + } + return vector_.length(); +} + +template inline void SYShashBase::extend() { + if (!vector_.isEmpty()) { + SYSpVector , 0> vector; + const uint32_t buckets = vector_.length() * 2; + vector.reshape(buckets); + initialize(vector); + for (uint32_t i = 0; i < vector_.length(); ++i) { + SYShlink* sl = vector_[i]; + SYShlink* nsl = 0; + while (sl != 0) { + nsl = sl->getNext(); + const uint32_t h = sl->getHash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + sl->setNext(vector[bucket]); + vector[bucket] = sl; + sl = nsl; + } + } + vector_ = vector; + } +} + +template inline uint32_t SYShashBase::entries() const { + return items_; +} + +template inline bool SYShashBase::isEmpty() const { + return (entries() == 0); +} + +template inline int64_t SYShashBase::getSize() const { + return entries() * sizeof(SYShlink); +} + +template inline SYShashBase::SYShashBase(const uint32_t initial) : initial_(initial == 0 ? 1 : initial), items_(0) { +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SYShash +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class SYShashIterator; + +template > class SYShash: public SYShashBase, public A { + + friend class SYShashIterator ; + +public: + + // constructors + SYShash(const uint32_t initial); + SYShash(const SYShash& right); + + // destructor + ~SYShash(); + + // operations + void insert(const T& t); + T* insertAndReturn(const T& t); + bool remove(const T& t); + bool contains(const T& t) const; + void clear(); + bool find(const T& t, T& r) const; + T* find(const T& t) const; + + // copy + SYShash& operator =(const SYShash& right); + + // equality + bool operator ==(const SYShash& right) const; +}; + +template inline SYShash::SYShash(const uint32_t initial) + : SYShashBase(initial) { +} + +template inline void SYShash::clear() { + for (uint32_t bucket = 0; bucket < this->vector_.length(); bucket++) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + SYShlink* next = sl->getNext(); + release(sl); + sl = next; + } + } + this->vector_.clear(); + this->items_ = 0; +} + +template inline SYShash::~SYShash() { + clear(); +} + +template inline T* SYShash::insertAndReturn(const T& t) { + const uint32_t buckets = this->initialize(); + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + SYShlink* sl = acquire(t, h, this->vector_[bucket]); + this->vector_[bucket] = sl; + this->items_++; + if (this->items_ == this->vector_.length()) { + this->extend(); + } + return static_cast(&sl->getObject()); +} + +template inline void SYShash::insert(const T& t) { + insertAndReturn(t); +} + +template inline bool SYShash::contains(const T& t) const { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + return true; + } + sl = sl->getNext(); + } + return false; +} + +template inline bool SYShash::remove(const T& t) { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + SYShlink* sl = this->vector_[bucket]; + SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + if (psl == 0) { + this->vector_[bucket] = sl->getNext(); + } else { + psl->setNext(sl->getNext()); + } + release(sl); + this->items_--; + return true; + } + psl = sl; + sl = sl->getNext(); + } + return false; +} + +template inline bool SYShash::find(const T& t, T& r) const { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + const SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + r = sl->getObject(); + return true; + } + psl = sl; + sl = sl->getNext(); + } + return false; +} + +template inline T* SYShash::find(const T& t) const { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return 0; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + const SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + return const_cast(&sl->getObject()); + } + psl = sl; + sl = sl->getNext(); + } + return 0; +} + +template inline bool SYShash::operator ==(const SYShash& right) const { + if (this->entries() != right.entries()) { + return false; + } + for (uint32_t bucket = 0; bucket < this->vector_.length(); bucket++) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (!right.contains(sl->getObject())) { + return false; + } + sl = sl->getNext(); + } + } + return true; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SYShashIterator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template > class SYShashIterator { +public: + + // constructors + SYShashIterator(SYShash& hash); + SYShashIterator(const SYShash& hash); + + // operators + bool operator ++(); + bool operator ()(); + + // operations + void reset(); + const T& key() const; + T& key(); + +private: + + // copy, assignment and equality are forbidden + SYShashIterator(const SYShashIterator& right); + SYShashIterator& operator =(const SYShashIterator& right); + bool operator ==(const SYShashIterator& right) const; + +protected: + + SYShash& hash_; + uint32_t bucket_; + SYShlink* sl_; +}; + +template inline void SYShashIterator::reset() { + bucket_ = SYS_NPOS; + sl_ = 0; +} + +template inline SYShashIterator::SYShashIterator(SYShash& hash) : hash_(hash) { + reset(); +} + +template inline SYShashIterator::SYShashIterator(const SYShash& hash) : hash_((SYShash&)hash) { + reset(); +} + +template inline bool SYShashIterator::operator ++() { + if (sl_ != 0) { + sl_ = sl_->getNext(); + } + while (sl_ == 0) { + bucket_++; // wrapping + if (bucket_ >= hash_.vector_.length()) { + return false; + } + sl_ = hash_.vector_[bucket_]; + } + return true; +} + +template inline bool SYShashIterator::operator ()() { + return ++(*this); +} + +template inline const T& SYShashIterator::key() const { + return sl_->getObject(); +} + +template inline T& SYShashIterator::key() { + return sl_->getObject(); +} + +// copy operator/constructor for SYShash: need iterator +template inline SYShash& SYShash::operator =( + const SYShash& right) { + if (this == &right) { + return *this; + } + clear(); + SYShashIterator iterator(right); + while (iterator()) { + insert(iterator.key()); + } + return *this; +} + +template inline SYShash::SYShash(const SYShash& right) + : SYShashBase(right.initial_) { + *this = right; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SYSpHash +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class SYSpHashIterator; + +template > class SYSpHash: public SYShashBase, public A { + + friend class SYSpHashIterator ; + +public: + + // constructors + SYSpHash(const uint32_t initial); + SYSpHash(const SYSpHash& right); + + // destructor + ~SYSpHash(); + + // operations + void insert(T* t); + T* remove(const T* t); + bool contains(const T* t) const; + void clear(); + void clearAndDestroy(); + T* find(const T* t) const; + + // copy + SYSpHash& operator = (const SYSpHash& right); + + // equality + bool operator ==(const SYSpHash& right) const; +}; + +template inline SYSpHash::SYSpHash(const uint32_t initial) : SYShashBase(initial) { +} + +template inline void SYSpHash::clear() { + uint32_t buckets = this->vector_.length(); + uint32_t bucket; + for (bucket = 0; bucket < buckets; bucket++) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + SYShlink* next = sl->getNext(); + release(sl); + sl = next; + } + } + this->vector_.clear(); + this->items_ = 0; +} + +template inline void SYSpHash::clearAndDestroy() { + uint32_t buckets = this->vector_.length(); + uint32_t bucket; + for (bucket = 0; bucket < buckets; bucket++) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + SYShlink* next = sl->getNext(); + delete sl->getObject(); + release(sl); + sl = next; + } + } + this->vector_.clear(); + this->items_ = 0; +} + +template inline SYSpHash::~SYSpHash() { + clear(); +} + +template inline void SYSpHash::insert(T* t) { + uint32_t buckets = this->initialize(); + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + this->vector_[bucket] = acquire(t, h, this->vector_[bucket]); + this->items_++; + if (this->items_ == this->vector_.length()) { + this->extend(); + } +} + +template inline bool SYSpHash::contains(const T* t) const { + uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (sl->getHash() == h && *(sl->getObject()) == *t) { + return true; + } + sl = sl->getNext(); + } + return false; +} + +template inline T* SYSpHash::remove(const T* t) { + uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return 0; + } + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + SYShlink* sl = this->vector_[bucket]; + SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && *(sl->getObject()) == *t) { + if (psl == 0) { + this->vector_[bucket] = sl->getNext(); + } else { + psl->setNext(sl->getNext()); + } + T* result = sl->getObject(); + release(sl); + this->items_--; + return result; + } + psl = sl; + sl = sl->getNext(); + } + return 0; +} + +template inline T* SYSpHash::find(const T* t) const { + uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return 0; + } + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + const SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && *(sl->getObject()) == *t) { + return sl->getObject(); + } + psl = sl; + sl = sl->getNext(); + } + return 0; +} + +template inline bool SYSpHash::operator ==(const SYSpHash& right) const { + if (this->entries() != right.entries()) { + return false; + } + for (uint32_t bucket = 0; bucket < this->vector_.length(); bucket++) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (!right.contains(*sl->getObject())) { + return false; + } + sl = sl->getNext(); + } + } + return true; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SYSpHashIterator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template > class SYSpHashIterator { +public: + + // constructor + SYSpHashIterator(SYSpHash& hash); + SYSpHashIterator(const SYSpHash& hash); + + // operators + T* operator ++(); + T* operator ()(); + + // operations + void reset(); + const T* key() const; + T* key(); + +private: + + // copy, assignment and equality are forbidden + SYSpHashIterator(const SYSpHashIterator& right); + SYSpHashIterator& operator =(const SYSpHashIterator& right); + bool operator ==(const SYSpHashIterator& right) const; + +protected: + + SYSpHash& hash_; + uint32_t bucket_; + SYShlink* sl_; +}; + +template inline void SYSpHashIterator::reset() { + bucket_ = SYS_NPOS; + sl_ = 0; +} + +template inline SYSpHashIterator::SYSpHashIterator(SYSpHash& hash) : hash_(hash) { + reset(); +} + +template inline SYSpHashIterator::SYSpHashIterator(const SYSpHash& hash) : hash_((SYSpHash&)hash) { + reset(); +} + +template inline T* SYSpHashIterator::operator ++() { + if (sl_ != 0) { + sl_ = sl_->getNext(); + } + while (sl_ == 0) { + bucket_++; // wrapping + if (bucket_ >= hash_.vector_.length()) { + return 0; + } + sl_ = hash_.vector_[bucket_]; + } + return sl_->getObject(); +} + +template inline T* SYSpHashIterator::operator ()() { + return ++(*this); +} + +template inline const T* SYSpHashIterator::key() const { + return sl_->getObject(); +} + +template inline T* SYSpHashIterator::key() { + return sl_->getObject(); +} + +// copy operator/constructor for SYShash: need iterator +template inline SYSpHash& SYSpHash::operator =(const SYSpHash& right) { + if (this == &right) { + return *this; + } + clear(); + SYSpHashIterator iterator(right); + while (iterator()) { + insert(iterator.key()); + } + return *this; +} + +template inline SYSpHash::SYSpHash(const SYSpHash& right) : SYShashBase(right.initial_) { + *this = right; +} + +} + +#endif /* #ifndef _spw_api_hash_h_ */ diff --git a/storage/sparrow/api/include/connection.h b/storage/sparrow/api/include/connection.h new file mode 100644 index 000000000000..426be4a7eece --- /dev/null +++ b/storage/sparrow/api/include/connection.h @@ -0,0 +1,163 @@ +#ifndef _spw_api_connection_h +#define _spw_api_connection_h + +#include "global.h" +#include "master.h" +#include "table.h" +#include "sparrowbuffer.h" + +#include + +/* + Exceptions are not used to remove any potential problems if the client application does not use the same + compiler as the API. + + Most object methods return a error code: 0 if success, -1 if an error occurred except if specified differently. + Additional error information can be retrieved using the const char* errmsg() function. + + Methods that return pointers to objects return NULL if the object allocation failed. Client application + is responsible for calling delete on the pointer when the object is not necessary anymore. + + Tested against current MySQL version 5.5.7. +*/ + +namespace Sparrow +{ + +inline bool networkErr(int32_t errCode) { + return (errCode == SPW_API_SOCKET_CONN_CLOSED || + errCode == SPW_API_SOCKET_READ_ERR || + errCode == SPW_API_SOCKET_WRITE_ERR ); +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ConnectionProperties +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define DEF_HOSTNAME "localhost" +#define DEF_PORT 11000 +#define DEF_USERNAME "admin" +#define DEF_PASSWORD "admin" +//#define DEF_SOURCE_ADDR "0.0.0.0" +#define DEF_SOURCE_ADDR "" +#define DEF_SOURCE_PORT 0 +#define DEF_MYSQL_PORT 3306 + +/* Declare a class that inherits from the ConnectionProperties interface. Then an object of that + class can be used to give connection settings to the Connection object through the + setProperties() method. +*/ + +class ConnectionProperties { + +protected: + virtual ~ConnectionProperties(){} + +public: + ConnectionProperties() {} + + // Hostname of the MySQL/Sparrow server. Can be either the name of the host or its IP address. + // Null or empty string means localhost + virtual const char* getHost() const = 0; + + // User cannot be empty. Password can. + virtual const char* getUser() const = 0; + virtual const char* getPsswd() const = 0; + + // Source address in IP format + virtual const char* getSrcAddr() const = 0; + + virtual uint32_t getSrcPort() const = 0; + + // Port used when connecting to Sparrow. + virtual uint32_t getPort() const = 0; + + // Port used when connecting to MySQL. + virtual uint32_t getMySQLPort() const = 0; +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Connection +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Connection +{ +public: + enum PurgeMode { + PURGE_MODE_ON_INSERTION, + PURGE_MODE_CONSTANTLY + }; + +public: + virtual ~Connection() {} + + // Set connection properties. Must be called before connect(). Can be called again with different + // values to connect to another Sparrow engine. + virtual int setProperties(const ConnectionProperties& properties, uint32_t compressionAlgo=0) = 0; + virtual int setProperties(const char* host, const char* user, const char* psswd, + uint32_t mysqlPort=DEF_MYSQL_PORT, uint32_t spwPrt=DEF_PORT, const char* srcAddr=DEF_SOURCE_ADDR, + uint32_t srcPort=0, uint32_t compressionAlgo=0) = 0; + virtual const ConnectionProperties* getProperties() const = 0; + + virtual int connect() = 0; + virtual void disconnect() = 0; + + // Returns true if the connection to the Sparrow engine is established + virtual bool isClosed() const = 0; + + // Creates an empty Table object + virtual Table* createTable() const = 0; + virtual Table* getTable(const char* database, const char* table) = 0; + virtual void releaseTable(const Table*) const = 0; + + // Creates a SparrowBuffer object for the given Table. + virtual SparrowBuffer* createBuffer(const Table* table, uint32_t capacity=UINT_MAX) const = 0; + virtual void releaseBuffer(const SparrowBuffer*) const = 0; + + virtual ColumnNames* createColumnNames(int size=0) = 0; + virtual void releaseColumnNames(const ColumnNames*) = 0; + virtual int insertData(const Table* table, const SparrowBuffer* buffers) = 0; + virtual int insertData(const Table* table, const ColumnNames* columns, const SparrowBuffer* buffers) = 0; + + // Disables the coalescing globally (= for all databases). After timeout seconds the coalescing switches back on automatically + virtual int disableCoalescing(uint32_t timeout, bool wait=false) = 0; + + // Disables the coalescing for a specific database + virtual int disableCoalescing(uint32_t timeout, const char* database, bool wait=false) = 0; + virtual int removePartitions(const char* database, const char* table, const uint64_t start, const uint64_t end) = 0; + virtual int switchPurgeMode(uint32_t timeout, const char* database, PurgeMode mode) = 0; + + // Returns NULL if failed. + virtual Master* getMasterFile(const char* database, const char* table) = 0; + virtual void releaseMasterFile(const Master*) const = 0; + + virtual unsigned int getListeningThrdId() const = 0; +}; + + +extern "C" +{ + // Must be the first API called before any other. It initializes internal static variables + // MySQL C library and some network resources. It is not thread-safe so it should be called + // before any client thread is created + SPW_API_PUBLIC_FUNC int initialize(); + + // Creates the API root object. Client application is responsible for calling delete on the + // resulting pointer when the object is no longer necessary. + SPW_API_PUBLIC_FUNC Connection* createConnect(); + + // Frees memory used by a Connection object. Client application should not call delete on the + // Connection*, but this method instead. + SPW_API_PUBLIC_FUNC void releaseConnect(const Connection*); + + // Returns the last error message. + SPW_API_PUBLIC_FUNC const char* errmsg(); +} + +} // namespace Sparrow + + + +#endif // #ifndef _spw_api_connection_h diff --git a/storage/sparrow/api/include/exception.h b/storage/sparrow/api/include/exception.h new file mode 100644 index 000000000000..eb53da8d6bb8 --- /dev/null +++ b/storage/sparrow/api/include/exception.h @@ -0,0 +1,135 @@ +/* + Sparrow exception. +*/ + +#ifndef _sparrow_api_exception_h_ +#define _sparrow_api_exception_h_ + +#include "global.h" + +#ifndef _WIN32 +#include +#endif +#include +#include +#include + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowException +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN32 +// Disable warning regarding exception specification. +#pragma warning(disable:4290 4996) +#endif + +/* + Disable MY_ATTRIBUTE for Visual Studio. +*/ +#ifndef MY_ATTRIBUTE +#if defined(__GNUC__) || defined(__clang__) +#define MY_ATTRIBUTE(A) __attribute__(A) +#else +#define MY_ATTRIBUTE(A) +#endif +#endif + +#ifdef __GNUG__ + #if __GNUC__ >= 8 + #define _THROW_(a) + #else + #define _THROW_(a) throw(a) + #endif +#elif defined(_MSC_VER) + #if _MSC_VER >= 1800 + #define _THROW_(a) + #else + #define _THROW_(a) throw(a) + #endif +#else + #define _THROW_(a) throw(a) +#endif + + +#define SPW_EXCEPT_MAXLENGTH 2048 + +class SparrowException { +private: + + bool logged_; + int32_t errcode_; + char buffer_[SPW_EXCEPT_MAXLENGTH]; + +public: + + SparrowException() : logged_(false), errcode_(SPW_API_FAILED) { + buffer_[0] = '\0'; + } + SparrowException(const char* text, const bool logged = true, int32_t err_code = SPW_API_FAILED); + static SparrowException create(const bool addError, int32_t err_code, const char* format, ...) MY_ATTRIBUTE((format(printf, 3, 4))); + SparrowException& operator = (const SparrowException& right) = default; + bool isLogged() const { + return logged_; + } + const char* getText() const { + return buffer_; + } + int32_t getErrcode() const { return errcode_; } + void toLog() const; + +}; + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-truncation" +#endif + +inline SparrowException::SparrowException(const char* text, const bool logged /* = true */, int32_t err_code /* = SPW_API_FAILED */) +: logged_(logged), errcode_(err_code) { + strncpy(buffer_, text, sizeof(buffer_) - 1); + buffer_[sizeof(buffer_) - 1] = '\0'; +} + +// STATIC +inline SparrowException SparrowException::create( const bool addError, int32_t err_code, const char* format, ... ) { + char buffer[SPW_EXCEPT_MAXLENGTH]; + va_list varargs; + va_start(varargs, format); + vsnprintf(buffer, sizeof(buffer), format, varargs); + va_end(varargs); + if (addError) { + char error[SPW_EXCEPT_MAXLENGTH]; +#ifdef _WIN32 + LPSTR serror = error; + if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, GetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), serror, sizeof(error), 0) == 0) { + snprintf(error, sizeof(error), "error %d", GetLastError()); + } else { // Windows adds a nasty new line char... + size_t l = strlen(error) - 1; + while (error[l] == '\n' || error[l] == '\r') { + error[l--] = 0; + } + } +#else + snprintf(error, sizeof(error), "%s", strerror(errno)); +#endif + char result[SPW_EXCEPT_MAXLENGTH]; + snprintf(result, sizeof(result), "%s (%s)", buffer, error); + return SparrowException(result, true, err_code); + } else { + return SparrowException(buffer, true, err_code); + } +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +inline void SparrowException::toLog() const { + if (logged_) { + fprintf(stderr, "Sparrow: %s", getText()); + } +} + +#endif /* #ifndef _sparrow_api_exception_h_ */ diff --git a/storage/sparrow/api/include/exceptwrapper.h b/storage/sparrow/api/include/exceptwrapper.h new file mode 100644 index 000000000000..4ce87d332d1d --- /dev/null +++ b/storage/sparrow/api/include/exceptwrapper.h @@ -0,0 +1,331 @@ +/* + Wrappers classes that transform return codes into exceptions. +*/ + +#ifndef _sparrow_api_exceptwrapper_h_ +#define _sparrow_api_exceptwrapper_h_ + +#include "global.h" +#include "sparrowbuffer.h" +#include "exception.h" +#include "connection.h" + +#ifdef SPARROW_API_EXPORTS +//#define SPW_EXCPTMSG Sparrow::spwerror.getText() +#define SPW_EXCPTMSG Sparrow::errmsg() +#else +#define SPW_EXCPTMSG Sparrow::errmsg() +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowBufferEx - SparrowBuffer with exceptions +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Encapsulates above class methods and transforms error codes into exceptions. +class SparrowBufferEx +{ +private: + SparrowBuffer* buffer_; + +public: + SparrowBufferEx(SparrowBuffer* buffer) : buffer_(buffer) + {;} + + operator SparrowBuffer* () { + return buffer_; + } + + SparrowBuffer* operator -> () { return buffer_; } + SparrowBuffer* get() { return buffer_; } + + + // Called by the client application's implementation of SparrowRow::decode() + // All methods can throw a SparrowException + void addNull(int column) _THROW_(SparrowException); + void addBool(int column, bool value) _THROW_(SparrowException); + void addByte(int column, uint8_t value) _THROW_(SparrowException); + void addShort(int column, uint16_t value) _THROW_(SparrowException); + void addInt(int column, uint32_t value) _THROW_(SparrowException); + void addLong(int column, uint64_t value) _THROW_(SparrowException); + void addDouble(int column, double value) _THROW_(SparrowException); + void addString(int column, const char* value) _THROW_(SparrowException); + void addBlob(int column, const uint8_t* value, uint32_t length) _THROW_(SparrowException); + + // Called by the client application + bool addRow(const SparrowRow& row, void* dummy=NULL) _THROW_(SparrowException); +}; + +inline void SparrowBufferEx::addNull( int column ) _THROW_(SparrowException) { + int res = buffer_->addNull( column ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addBool( int column, bool value ) _THROW_(SparrowException) { + int res = buffer_->addBool( column, value ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addByte( int column, uint8_t value ) _THROW_(SparrowException) { + int res = buffer_->addByte( column, value ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addShort( int column, uint16_t value ) _THROW_(SparrowException) { + int res = buffer_->addShort( column, value ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addInt( int column, uint32_t value ) _THROW_(SparrowException) { + int res = buffer_->addInt( column, value ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addLong( int column, uint64_t value ) _THROW_(SparrowException) { + int res = buffer_->addLong( column, value ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addDouble( int column, double value ) _THROW_(SparrowException) { + int res = buffer_->addDouble( column, value ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addString( int column, const char* value ) _THROW_(SparrowException) { + int res = buffer_->addString( column, value ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline void SparrowBufferEx::addBlob( int column, const uint8_t* value, uint32_t length ) _THROW_(SparrowException) { + int res = buffer_->addBlob( column, value, length ); + if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } +} + +inline bool SparrowBufferEx::addRow( const SparrowRow& row, void* dummy ) _THROW_(SparrowException) { + int res = buffer_->addRow( row, dummy ); + if ( res == SPW_API_BUFFER_FULL ) { + return false; + } + else if ( res != 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return true; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TableEx - Table with exceptions +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TableEx +{ +private: + Table* table_; + +public: + TableEx(Table* table) : table_(table) + {;} + + operator Table* () { + return table_; + } + + Table* operator -> () { return table_; } + Table* get() { return table_; } + + + // Methods for specifying the table + void setDatabaseName(const char*); + void setTableName(const char*); + void setMaxLifetime(uint64_t); + void setCoalescPeriod(uint64_t); + void setAggregPeriod(uint32_t); + + const char* getDatabaseName() const; + const char* getTableName() const; + uint64_t getMaxLifetime() const; + uint64_t getCoalescPeriod() const; + uint32_t getAggregPeriod() const; + + int appendColumn(const char* name, uint32_t index, ColumnType type, uint32_t stringSize=0, + uint32_t flags=0, uint32_t info=0, const char* charset=DEF_CHARSET) _THROW_(SparrowException); + uint32_t getNbColumns() const; + const Column& getColumn(uint32_t index); + + int appendIndex(const char* name, uint32_t colIndex, bool unique) _THROW_(SparrowException); + + int addColToIndex(uint32_t indexId, uint32_t colIndex) _THROW_(SparrowException); + + int appendFK(const char* name, uint32_t colIndex, const char* databaseName, const char* tableName, + const char* columnName) _THROW_(SparrowException); + + int addDnsEntry(uint32_t dnsEntry) _THROW_(SparrowException); + int addDnsServer(uint32_t entryIndex, const char* name, uint32_t port, const char* sourcAddr, uint32_t sourcePort) _THROW_(SparrowException); + + // Creates the table and/or database schema if they don't exist. Updates the table if it exists. + int create(Connection* connection) _THROW_(SparrowException); +}; + + + +inline void TableEx::setDatabaseName( const char* name ) { + table_->setDatabaseName( name ); +} + +inline void TableEx::setTableName( const char* name ) { + table_->setTableName( name ); +} + +inline void TableEx::setMaxLifetime( uint64_t value ) { + table_->setMaxLifetime( value ); +} + +inline void TableEx::setCoalescPeriod( uint64_t value ) { + table_->setCoalescPeriod( value ); +} + +inline void TableEx::setAggregPeriod( uint32_t value ) { + table_->setAggregPeriod( value ); +} + +inline const char* TableEx::getDatabaseName() const { + return table_->getDatabaseName(); +} + +inline const char* TableEx::getTableName() const { + return table_->getTableName(); +} + +inline uint64_t TableEx::getMaxLifetime() const { + return table_->getMaxLifetime(); +} + +inline uint64_t TableEx::getCoalescPeriod() const { + return table_->getCoalescPeriod(); +} + +inline uint32_t TableEx::getAggregPeriod() const { + return table_->getAggregPeriod(); +} + +inline int TableEx::appendColumn(const char* name, uint32_t index, ColumnType type, uint32_t stringSize, + uint32_t flags, uint32_t info, const char* charset) _THROW_(SparrowException) +{ + int res = table_->appendColumn( name, index, type, stringSize, flags, info, charset ); + if ( res < 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return res; +} + +inline uint32_t TableEx::getNbColumns() const { + return table_->getNbColumns(); +} + +inline const Column& TableEx::getColumn( uint32_t index ) { + return table_->getColumn( index ); +} + +inline int TableEx::appendIndex( const char* name, uint32_t colIndex, bool unique ) _THROW_(SparrowException) +{ + int res = table_->appendIndex( name, colIndex, unique ); + if ( res < 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return res; +} + +inline int TableEx::appendFK( const char* name, uint32_t colIndex, const char* databaseName, const char* tableName, + const char* columnName ) _THROW_(SparrowException) +{ + int res = table_->appendFK( name, colIndex, databaseName, tableName, columnName ); + if ( res < 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return res; +} + +inline int TableEx::addDnsEntry(uint32_t dnsEntry) _THROW_(SparrowException) +{ + int res = table_->addDnsEntry( dnsEntry ); + if ( res < 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return res; +} + +inline int TableEx::addDnsServer(uint32_t entryIndex, const char* name, uint32_t port, const char* sourcAddr, uint32_t sourcePort) _THROW_(SparrowException) +{ + int res = table_->addDnsServer( entryIndex, name, port, sourcAddr, sourcePort ); + if ( res < 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return res; +} + +inline int TableEx::create(Connection* connection) _THROW_(SparrowException) +{ + int res = table_->create( connection ); + if ( res < 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return res; +} + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ConnectionEx - Connection with exceptions +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ConnectionEx +{ +private: + Connection* conn_; + +public: + ConnectionEx(Connection* conn) : conn_(conn) + {;} + + operator Connection* () { + return conn_; + } + + Connection* operator -> () { return conn_; } + Connection* get() { return conn_; } + + int insertData(const Table* table, const SparrowBuffer* buffers); + +}; + +inline int ConnectionEx::insertData(const Table* table, const SparrowBuffer* buffers) { + int res = conn_->insertData( table, buffers ); + if ( res < 0 ) { + throw SparrowException( SPW_EXCPTMSG, false, res ); + } + return res; +} + + +} // namespace Sparrow + +#endif /* #ifndef _sparrow_api_exceptwrapper_h_ */ diff --git a/storage/sparrow/api/include/global.h b/storage/sparrow/api/include/global.h new file mode 100644 index 000000000000..67217f861539 --- /dev/null +++ b/storage/sparrow/api/include/global.h @@ -0,0 +1,43 @@ +#ifndef _spw_api_global_h +#define _spw_api_global_h + +#include +#include + +// Some error codes +#define SPW_API_OK 0 +#define SPW_API_FAILED -1 +#define SPW_API_BUFFER_FULL -2 +#define SPW_API_OUT_OF_MEMORY -3 +#define SPW_API_COL_NOT_NULLABLE -4 +#define SPW_API_COLINDX_OOB -5 // Column index is out of bounds +#define SPW_API_INCOMPATIBLE_TYPES -6 // Type of C data is not compatible with type of column +#define SPW_API_SOCKET_CONN_CLOSED -7 +#define SPW_API_SOCKET_READ_ERR -8 +#define SPW_API_SOCKET_WRITE_ERR -9 +#define SPW_API_INVALID_ARG -10 + + + +#ifndef SPW_API_PUBLIC_FUNC + +#if defined(_WIN32) +#ifdef SPARROW_API_EXPORTS +#define SPW_API_PUBLIC_FUNC __declspec(dllexport) +#else +// this is for static build +#ifdef SPW_API_LIB_BUILD +#define SPW_API_PUBLIC_FUNC +#else +// this is for clients using dynamic lib +#define SPW_API_PUBLIC_FUNC __declspec(dllimport) +#endif +#endif +#else +#define SPW_API_PUBLIC_FUNC +#endif + +#endif //#ifndef CPPCONN_PUBLIC_FUNC + + +#endif /* _spw_api_global_h */ diff --git a/storage/sparrow/api/include/master.h b/storage/sparrow/api/include/master.h new file mode 100644 index 000000000000..2eef8cff3062 --- /dev/null +++ b/storage/sparrow/api/include/master.h @@ -0,0 +1,60 @@ +#ifndef _spw_api_master_h_ +#define _spw_api_master_h_ + +#include "global.h" +#include "types.h" + +namespace Sparrow +{ + +class Master +{ +public: + virtual ~Master() {} + + virtual uint64_t getSize() const = 0; + virtual uint32_t getVersion() const = 0; + virtual uint64_t getMaxLifetime() const = 0; + virtual uint32_t getAggregPeriod() const = 0; + virtual uint64_t getCoalescingPeriod() const = 0; + virtual uint64_t getDefaultWhere() const = 0; + virtual uint64_t getStringOptimization() const = 0; + virtual uint64_t getSerial() const = 0; + virtual uint64_t getTimeCreated() const = 0; + virtual uint64_t getTimeUpdated() const = 0; + virtual uint64_t getDataSize() const = 0; + virtual uint64_t getIndexSize() const = 0; + virtual uint64_t getRecords() const = 0; + virtual uint32_t getIndexAlterSerial() const = 0; + + virtual uint32_t getNbColumns() const = 0; + virtual const Column& getColumn(uint32_t index) const = 0; + + virtual uint32_t getNbIndexes() const = 0; + virtual const Index& getIndex(uint32_t index) const = 0; + + virtual uint32_t getNbActiveIndexes() const = 0; + virtual uint32_t getActiveIndexes(uint32_t index) const = 0; + + virtual uint32_t getNbIndexMappings() const = 0; + virtual uint32_t getIndexMapping(uint32_t index) const = 0; + + virtual uint32_t getNbFK() const = 0; + virtual const ForeignKey& getFK(uint32_t index) const = 0; + + virtual uint32_t getNbDnsEntries() const = 0; + virtual uint32_t getDnsEntry(uint32_t index) const = 0; + virtual uint32_t getNbDnsServers(uint32_t index) const = 0; + virtual const DnsServer& getDnsServer(uint32_t index, uint32_t index2) const = 0; + + virtual uint32_t getNbIndexAlterations() const = 0; + virtual const Alteration& getIndexAlteration(uint32_t index) const = 0; + + virtual uint32_t getNbPartitions() const = 0; + virtual const Partition& getPartition(uint32_t index) const = 0; + +}; + +} // namespace Sparrow + +#endif // #define _spw_api_master_h_ diff --git a/storage/sparrow/api/include/old/spw_global.h b/storage/sparrow/api/include/old/spw_global.h new file mode 100644 index 000000000000..33f8ea06a6a4 --- /dev/null +++ b/storage/sparrow/api/include/old/spw_global.h @@ -0,0 +1,82 @@ +#ifndef _spw_api_spw_global_h +#define _spw_api_spw_global_h + +/* Typdefs for easier portability */ +// Obsolete definitions. Should not be used anymore. Use standard definitins instead. +#if 0 + + +typedef unsigned char uchar; /* Short for unsigned char */ + +#ifndef int8_t +typedef signed char int8_t; /* Signed integer >= 8 bits */ +#endif +#ifndef uint8_t +typedef unsigned char uint8_t; /* Unsigned integer >= 8 bits */ +#endif +#ifndef uint8_t +typedef short int16_t; +#endif +#ifndef int16_t +typedef unsigned short uint16_t; +#endif + +#ifndef int32_t +typedef int int32_t; +#endif +#ifndef uint32_t +typedef unsigned int uint32_t; +#endif + +#ifndef ulong +typedef unsigned long ulong; /* Short for unsigned long */ +#endif + +#ifndef int64_t +typedef long long int64_t; +#endif +#ifndef uint64_t +typedef unsigned long long uint64_t; +#endif + +#ifndef uint +typedef unsigned int uint; +#endif +#ifndef ushort +typedef unsigned short ushort; +#endif + + +/* First check for ANSI C99 definition: */ +#ifdef ULLONG_MAX +#undef ULLONG_MAX +#endif + +#ifdef ULLONG_MAX +#define ULLONG_MAX ULLONG_MAX +#else +#define ULLONG_MAX ((unsigned long long)(~0ULL)) +#endif + +#define INT_MIN64 (~0x7FFFFFFFFFFFFFFFLL) +#define INT_MAX64 0x7FFFFFFFFFFFFFFFLL +#define INT_MIN32 (~0x7FFFFFFFL) +#define INT_MAX32 0x7FFFFFFFL +#define UINT_MAX32 0xFFFFFFFFL +#define INT_MIN24 (~0x007FFFFF) +#define INT_MAX24 0x007FFFFF +#define UINT_MAX24 0x00FFFFFF +#define INT_MIN16 (~0x7FFF) +#define INT_MAX16 0x7FFF +#define UINT_MAX16 0xFFFF +#define INT_MIN8 (~0x7F) +#define INT_MAX8 0x7F +#define UINT_MAX8 0xFF + +#ifndef NULL +#define NULL 0 +#endif + +#endif + +#endif /* _spw_api_spw_global_h */ diff --git a/storage/sparrow/api/include/sparrowbuffer.h b/storage/sparrow/api/include/sparrowbuffer.h new file mode 100644 index 000000000000..284ac50bcd15 --- /dev/null +++ b/storage/sparrow/api/include/sparrowbuffer.h @@ -0,0 +1,76 @@ +#ifndef _spw_api_sparrowbuffer_h_ +#define _spw_api_sparrowbuffer_h_ + +#include "global.h" +#include "table.h" + +namespace Sparrow +{ + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowRow +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* All row data that are inserted through the Sparrow API must inherit from the SparrowRow interface + and implement the decode() method. That method uses the addXXX methods from SparrowBuffer to + format row data for insertion into Sparrow. There should be one addXXX call per column in the table, + in the same order as the column were created. +*/ + +class SparrowBuffer; +class SparrowRow +{ +public: + virtual ~SparrowRow() {} + + // Use dummy to pass whatever optional client context object necessary for storing the + // row into the buffer + virtual int decode(SparrowBuffer* buffer, void* dummy) const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SparrowBuffer +{ +public: + virtual ~SparrowBuffer() {} + + // Called by the client application's implementation of SparrowRow::decode() + virtual int addNull(int column) = 0; + virtual int addBool(int column, bool value) = 0; + virtual int addByte(int column, uint8_t value) = 0; + virtual int addShort(int column, uint16_t value) = 0; + virtual int addInt(int column, uint32_t value) = 0; + virtual int addLong(int column, uint64_t value) = 0; + virtual int addDouble(int column, double value) = 0; + virtual int addString(int column, const char* value) = 0; + virtual int addBlob(int column, const uint8_t* value, uint32_t length) = 0; + + // Called by the client application + virtual int addRow(const SparrowRow& row, void* dummy=NULL) = 0; + + // Frees all buffers + virtual void clear() = 0; + + // Returns true if empty + virtual bool isEmpty() const = 0; + + // Returns size in bytes + virtual uint32_t getSize() const = 0; + + // Returns number of rows + virtual uint32_t getRows() const = 0; + + // Returns time of insertion of the first row + virtual uint64_t getTimestamp() const = 0; + + // Returns true if the data has been stored in buffer for more than 'delay' (in seconds) + virtual bool hasExpired(uint32_t delay) const = 0; +}; + + +} // namespace Sparrow + +#endif // #define _spw_api_sparrowbuffer_h_ diff --git a/storage/sparrow/api/include/table.h b/storage/sparrow/api/include/table.h new file mode 100644 index 000000000000..139c520206f1 --- /dev/null +++ b/storage/sparrow/api/include/table.h @@ -0,0 +1,59 @@ +#ifndef _spw_api_table_h +#define _spw_api_table_h + +#include "global.h" +#include "types.h" + + +namespace Sparrow +{ + +// Most methods returns 0 if success, -1 if error except if specified otherwise +// appendXXX() methods returns item index (>=0) if success, -1 if error +class Connection; +class Table +{ +public: + virtual ~Table() {} + + // Methods for specifying the table + virtual void setDatabaseName(const char*) = 0; + virtual void setTableName(const char*) = 0; + virtual void setMaxLifetime(uint64_t) = 0; + virtual void setCoalescPeriod(uint64_t) = 0; + virtual void setAggregPeriod(uint32_t) = 0; + virtual void setDefaultWhere(uint64_t) = 0; + virtual void setStringOptimization(uint64_t) = 0; + + virtual const char* getDatabaseName() const = 0; + virtual const char* getTableName() const = 0; + virtual uint64_t getMaxLifetime() const = 0; + virtual uint64_t getCoalescPeriod() const = 0; + virtual uint32_t getAggregPeriod() const = 0; + virtual uint64_t getDefaultWhere() const = 0; + virtual uint64_t getStringOptimization() const = 0; + + virtual int appendColumn(const char* name, uint32_t index, ColumnType type, uint32_t stringSize=0, + uint32_t flags=0, uint32_t info=0, const char* charset=DEF_CHARSET) = 0; + virtual uint32_t getNbColumns() const = 0; + virtual const Column& getColumn(uint32_t index) const = 0; + virtual Column& getColumn(uint32_t index) = 0; + + virtual int appendIndex(const char* name, uint32_t colIndex, bool unique) = 0; + + virtual int addColToIndex(uint32_t indexId, uint32_t colIndex) = 0; + + virtual int appendFK(const char* name, uint32_t colIndex, const char* databaseName, const char* tableName, + const char* columnName) = 0; + + virtual int addDnsEntry(uint32_t dnsEntry) = 0; + virtual int addDnsServer(uint32_t entryIndex, const char* name, uint32_t port, const char* sourcAddr, uint32_t sourcePort) = 0; + + // Creates the table and/or database schema if they don't exist. Updates the table if it exists. + virtual int create(Connection* connection) = 0; +}; + + +} // namespace Sparrow + +#endif // #define _spw_api_table_h diff --git a/storage/sparrow/api/include/types.h b/storage/sparrow/api/include/types.h new file mode 100644 index 000000000000..3419e0495d27 --- /dev/null +++ b/storage/sparrow/api/include/types.h @@ -0,0 +1,191 @@ +#ifndef _spw_api_types_h_ +#define _spw_api_types_h_ + +#include "global.h" + +namespace Sparrow { + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Column +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define DEF_CHARSET "utf8_bin" + +// Column type as described in the storage adapter. +enum ColumnType { + COL_BLOB, + COL_BYTE, + COL_DOUBLE, + COL_INT, + COL_LONG, + COL_STRING, + COL_TIMESTAMP, + COL_SHORT, + COL_UNKNOWN +}; + +inline const char* getType( ColumnType type ) { + switch ( type ) + { + case COL_BLOB: return "VARBINARY"; + case COL_BYTE: return "TINYINT"; + case COL_DOUBLE: return "DOUBLE"; + case COL_INT: return "INT"; + case COL_LONG: return "BIGINT"; + case COL_STRING: return "VARCHAR"; + case COL_TIMESTAMP: return "TIMESTAMP"; + case COL_SHORT: return "SMALLINT"; + case COL_UNKNOWN: return ""; + } + return ""; +} + + +/** + * Note on COL_IP_LOOKUP: + * This column contains STRING values set using reverse DNS + * lookups from another BLOB column containing IP + * addresses. When this flag is set, the column's info attribute gives + * the index of the source IP column. + */ + +enum ColumnFlags { + COL_NULLABLE = 1, // Column can contain NULLs. + COL_IP_ADDRESS = 2, // Column contains IP addresses. + COL_IP_LOOKUP = 4, // Column contains IP address lookups. Index of column referencing the Ip address is set in info_ + COL_DNS_IDENTIFIER = 8, // Column gives the DNS identifier. + COL_AUTO_INC = 16, // Column is auto incremental. + COL_UNSIGNED = 32 // Column values are unsigned. +}; + +class Column +{ +protected: + virtual ~Column() {} + +public: + virtual const char* getName() const = 0; + virtual ColumnType getType() const = 0; + virtual bool isString() const = 0; + virtual uint32_t getStringSize() const = 0; + virtual uint32_t getIndex() const = 0; + virtual uint32_t getFlags() const = 0; + virtual bool isFlagSet(const ColumnFlags flag) const = 0; + virtual void addFlag(const ColumnFlags flag) = 0; + virtual void removeFlag(const ColumnFlags flag) = 0; + virtual uint32_t getInfo() const = 0; + virtual void setInfo(const uint32_t info) = 0; + virtual const char* getCharset() const = 0; + virtual uint32_t getSerial() const = 0; + virtual uint32_t getDropSerial() const = 0; + virtual bool isDropped() const = 0; + virtual const char* getDefaultValue() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// List of columns names +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ColumnNames { +public: + virtual ~ColumnNames() {} + virtual uint32_t size() const = 0; + virtual uint32_t appendName(const char*) = 0; + virtual const char* getName(int index) const = 0; +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Index +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Index { +protected: + virtual ~Index() {} + +public: + virtual const char* getName() const = 0; + + // Returns the number of columns ids stored in colIds, or -1 if there is more than len + virtual uint32_t getColumnIds(uint32_t* colIds, uint32_t len) const = 0; + virtual bool isUnique() const = 0; + virtual bool isDropped() const = 0; +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Alteration +////////////////////////////////////////////////////////////////////////////////////////////////////// + +enum AlterationType { + ALT_UNKNOWN, + ALT_ADD_INDEX, + ALT_DROP_INDEX +}; + + +class Alteration { +protected: + virtual ~Alteration() {} + +public: + + virtual AlterationType getType() const = 0; + virtual uint32_t getSerial() const = 0; + virtual uint32_t getId() const = 0; +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ForeignKey +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Describes a table foreign key. +class ForeignKey { +protected: + virtual ~ForeignKey() {} + +public: + virtual const char* getName() const = 0; + virtual uint32_t getColumnId() const = 0; + virtual const char* getDatabaseName() const = 0; + virtual const char* getTableName() const = 0; + virtual const char* getColumnName() const = 0; +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsServer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsServer { +protected: + virtual ~DnsServer() {} + +public: + + virtual const char* getHost() const = 0; + virtual uint32_t getPort() const = 0; + virtual const char* getSourceAddr() const = 0; + virtual uint32_t getSourcePort() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Partition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Partition +{ +protected: + virtual ~Partition() {} + +public: + virtual uint64_t getSerial() const = 0; + virtual uint32_t getFilesystem() const = 0; + virtual uint32_t getIndexAlterSerial() const = 0; +}; + +} + +#endif /* #ifndef _spw_api_types_h_ */ diff --git a/storage/sparrow/api/interval.h b/storage/sparrow/api/interval.h new file mode 100644 index 000000000000..5c493cdda7e0 --- /dev/null +++ b/storage/sparrow/api/interval.h @@ -0,0 +1,332 @@ +/* + Generic interval. +*/ + +#ifndef _engine_interval_h_ +#define _engine_interval_h_ + +#include "intervaltree.h" + +namespace Sparrow { + +template class Interval : public AbstractInterval { +private: + T lower_; + T upper_; + unsigned int lowerIncluded_:1; + unsigned int upperIncluded_:1; + unsigned int lowerSet_:1; + unsigned int upperSet_:1; + unsigned int pad_:28; + +private: + + // Creates a void (empty) interval. + // This constructor is private and takes a dummy parameter to distinguish from default constructor. + Interval(const bool foo) { + lowerSet_ = false; + upperSet_ = false; + lowerIncluded_ = true; + upperIncluded_ = true; + } + + // Checks whether the given interval info is valid. + static bool check(const T* lower, const bool lowerIncluded, const T* upper, const bool upperIncluded) { + if ((lower == 0 && lowerIncluded) || (upper == 0 && upperIncluded)) { + // Infinite bound cannot be included in interval. + return false; + } + if (lower != 0 && upper != 0) { + if (*lower > *upper) { + // Lower bound cannot be greater than upper bound. + return false; + } else if (!(*upper > *lower)) { + // Bounds are equal. + if (!lowerIncluded || !upperIncluded) { + // Single value with lower or upper bound excluded. + return false; + } + } + } + return true; + } + + // Compare two bounds. + static bool compareBounds(const T* bound1, const T* bound2) { + if (bound1 == 0) { + return bound2 == 0; + } else { + return bound2 != 0 && *bound1 == *bound2; + } + } + + // Compare two upper or lower bounds. + static int compareBounds(const T* bound1, const T* bound2, const bool upper) { + if (bound1 == 0) { + if (bound2 == 0) { + return 0; + } else if (upper) { + return 1; + } else { + return -1; + } + } else if (bound2 != 0) { + if (*bound1 < *bound2) { + return -1; + } else if (*bound2 < *bound1) { + return 1; + } + return 0; + } else if (upper) { + return -1; + } else { + return 1; + } + } + +public: + + Interval() : lowerIncluded_(false), upperIncluded_(false), lowerSet_(false), upperSet_(false) { + } + Interval(const T bound) : lower_(bound), upper_(bound), lowerIncluded_(true), upperIncluded_(true), lowerSet_(true), upperSet_(true) { + } + Interval(const T lower, const T upper) : lower_(lower), upper_(upper), lowerIncluded_(true), upperIncluded_(true), lowerSet_(true), upperSet_(true) { + } + Interval(const T* lower, const T* upper, const bool lowerIncluded, const bool upperIncluded) : lowerIncluded_(lowerIncluded), upperIncluded_(upperIncluded), + lowerSet_(lower != 0), upperSet_(upper != 0) { + if (lower != 0) { + lower_ = *lower; + } + if (upper != 0) { + upper_ = *upper; + } + } + + // Gets lower bound, returns 0 if -infinite. + const T* getLow() const { + return lowerSet_ ? &lower_ : 0; + } + + // Gets upper bound, returns 0 if +infinite. + const T* getUp() const { + return upperSet_ ? &upper_ : 0; + } + + T getLength() const { + return *getUp() - *getLow(); + } + + bool isLowerIncluded() const { + return lowerIncluded_; + } + + bool isUpperIncluded() const { + return upperIncluded_; + } + + bool isVoid() const { + return lowerIncluded_ && upperIncluded_ && !lowerSet_ && !upperSet_; + } + + bool isAll() const { + return !lowerIncluded_ && !upperIncluded_ && !lowerSet_ && !upperSet_; + } + + bool isPoint() const { + return lowerSet_ && upperSet_ && lowerIncluded_ && upperIncluded_ && lower_ == upper_; + } + + bool contains(const Interval& interval) const { + if (interval.isVoid()) { + return true; + } + if (isVoid()) { + return false; + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + if (lowerCmp > 0 || (lowerCmp == 0 && !lowerIncluded_ && interval.lowerIncluded_)) { + return false; + } + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + if (upperCmp < 0 || (upperCmp == 0 && !upperIncluded_ && interval.upperIncluded_)) { + return false; + } + return true; + } + + bool contains(const T& value) const { + if (isVoid()) { + return false; + } + const int lowerCmp = compareBounds(getLow(), &value, false); + if (lowerCmp > 0 || (lowerCmp == 0 && !lowerIncluded_)) { + return false; + } + const int upperCmp = compareBounds(getUp(), &value, true); + if (upperCmp < 0 || (upperCmp == 0 && !upperIncluded_)) { + return false; + } + return true; + } + + bool isAdjacent(const Interval& interval) const { + if (compareBounds(getUp(), interval.getLow()) + && upperIncluded_ == !interval.lowerIncluded_) { + return true; + } + if (compareBounds(getLow(), interval.getUp()) + && lowerIncluded_ == !interval.upperIncluded_) { + return true; + } + return false; + } + + Interval makeIntersection(const Interval& interval) const { + if (isVoid() || interval.isVoid()) { + // One of the interval is void: return void. + return Interval(false); + } else if (isAll()) { + // One of the interval is all: return the other one. + return interval; + } else if (interval.isAll()) { + return *this; + } else if (isAdjacent(interval)) { + // Intervals are adjacent: return void. + return Interval(false); + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + const T* lowerBound = lowerCmp >= 0 ? getLow() : interval.getLow(); + const bool lowerBoundIncluded = lowerCmp >= 0 ? lowerIncluded_ : interval.lowerIncluded_; + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + const T* upperBound = upperCmp >= 0 ? interval.getUp() : getUp(); + const bool upperBoundIncluded = upperCmp >= 0 ? interval.upperIncluded_ : upperIncluded_; + if (check(lowerBound, lowerBoundIncluded, upperBound, upperBoundIncluded)) { + return Interval(lowerBound, upperBound, lowerBoundIncluded, upperBoundIncluded); + } else { + // No intersection: return void interval. + return Interval(false); + } + } + + bool intersects(const Interval& interval) const { + if (isVoid() || interval.isVoid()) { + return false; + } else if (isAll() || interval.isAll()) { + return true; + } else if (isAdjacent(interval)) { + return false; + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + const T* lowerBound = lowerCmp >= 0 ? getLow() : interval.getLow(); + const bool lowerBoundIncluded = lowerCmp >= 0 ? lowerIncluded_ : interval.lowerIncluded_; + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + const T* upperBound = upperCmp >= 0 ? interval.getUp() : getUp(); + const bool upperBoundIncluded = upperCmp >= 0 ? interval.upperIncluded_ : upperIncluded_; + return check(lowerBound, lowerBoundIncluded, upperBound, upperBoundIncluded); + } + + Interval makeUnion(const Interval& interval) const { + // One of the interval is void: return the other one. + if (isVoid()) { + return interval; + } + if (interval.isVoid()) { + return *this; + } + + // One of the interval is all: return all. + if (isAll()) { + return *this; + } + if (interval.isAll()) { + return interval; + } + + // Cannot merge intervals that do not intersect. + if (!intersects(interval) && !isAdjacent(interval)) { + return Interval(false); + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + const T* lowerBound = lowerCmp >= 0 ? interval.getLow() : getLow(); + const bool lowerBoundIncluded = lowerCmp >= 0 ? interval.lowerIncluded_ : lowerIncluded_; + const T* upperBound = upperCmp >= 0 ? getUp() : interval.getUp(); + const bool upperBoundIncluded = upperCmp >= 0 ? upperIncluded_ : interval.upperIncluded_; + return Interval(lowerBound, upperBound, lowerBoundIncluded, upperBoundIncluded); + } + + // Returns one or two intervals adjacent to this interval so the union of + // all those intervals represent all values. + // There are two result intervals if and only if !result[1].isVoid(). + void makeNot(Interval* result) const { + if (isAll()) { + result[0] = Interval(false); + result[1] = Interval(false); + } else if (isVoid()) { + result[0] = Interval(); + result[1] = Interval(false); + } else { + const T* lowerBound = getLow(); + const T* upperBound = getUp(); + int index = 0; + if (lowerBound == 0 || upperBound != 0) { + result[index++] = Interval(upperBound, 0, !isUpperIncluded(), false); + } + if (lowerBound != 0 || upperBound == 0) { + result[index] = Interval(0, lowerBound, false, !isLowerIncluded()); + } + } + } + + // For sorting, use the lower bound only. + bool operator < (const Interval& interval) const { + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + if (lowerCmp != 0) { + return lowerCmp < 0; + } + if (lowerIncluded_) { + return !interval.lowerIncluded_; + } else { + return false; + } + } + + bool operator == (const Interval& interval) const { + return lower_ == interval.lower_ && upper_ == interval.upper_ + && lowerIncluded_ == interval.lowerIncluded_ && upperIncluded_ == interval.upperIncluded_ + && lowerSet_ == interval.lowerSet_ && upperSet_ == interval.upperSet_; + } + + // Implementation of AbstractInterval. + T getMin() const override { + const T* low = getLow(); + if (low == 0) { + return AbstractInterval::getSmallest(); + } else { + return *low; + } + } + + T getMax() const override { + const T* up = getUp(); + if (up == 0) { + return AbstractInterval::getLargest(); + } else { + return *up; + } + } + + int compareTo(const AbstractInterval& right) const override { + if (getMin() == right.getMin()) { + return 0; + } else if (getMin() < right.getMin()) { + return -1; + } else { + return 1; + } + } +}; + +} + +#endif /* #ifndef _engine_interval_h_ */ diff --git a/storage/sparrow/api/intervaltree.h b/storage/sparrow/api/intervaltree.h new file mode 100644 index 000000000000..6d74ce2941db --- /dev/null +++ b/storage/sparrow/api/intervaltree.h @@ -0,0 +1,599 @@ +/* + AbstractInterval tree + */ + +#ifndef _engine_intervaltree_h_ +#define _engine_intervaltree_h_ + +#include "vec.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AbstractInterval +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class AbstractInterval { +public: + + virtual ~AbstractInterval() { + } + virtual T getMin() const = 0; + virtual T getMax() const = 0; + virtual int compareTo(const AbstractInterval& right) const = 0; + + static T getSmallest(); // Smallest possible value. + static T getLargest(); // Largest possible value. +}; + +template<> inline uint64_t AbstractInterval::getSmallest() { + return 0; +} + +template<> inline uint64_t AbstractInterval::getLargest() { + return ULLONG_MAX; +} + +template class SimpleInterval : public AbstractInterval { +private: + + const T low_; + const T high_; + +public: + + SimpleInterval(const T& low, const T& high) : low_(low), high_(high) { + } + + T getMin() const override { + return low_; + } + + T getMax() const override { + return high_; + } + + int compareTo(const AbstractInterval& right) const { + if (getMin() == right.getMin()) { + return 0; + } else if (getMin() < right.getMin()) { + return -1; + } else { + return 1; + } + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IntervalTreeNode +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class IntervalTree; +template class IntervalTreeNode { + friend class IntervalTree; + +private: + + AbstractInterval* interval_; + T maxHigh_; + bool red_; + IntervalTreeNode* left_; + IntervalTreeNode* right_; + IntervalTreeNode* parent_; + +public: + + IntervalTreeNode(); + IntervalTreeNode(AbstractInterval* interval); + ~IntervalTreeNode(); + AbstractInterval* getInterval(); +}; + +template inline IntervalTreeNode::IntervalTreeNode() { +} + +template inline IntervalTreeNode::IntervalTreeNode(AbstractInterval* interval) + : interval_(interval), maxHigh_(interval->getMax()), left_(0), right_(0), parent_(0) { +} + +template inline IntervalTreeNode::~IntervalTreeNode() { +} + +template inline AbstractInterval* IntervalTreeNode::getInterval() { + return interval_; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IntervalTree +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class IntervalTree { +private: + + IntervalTreeNode* nil_; + IntervalTreeNode* root_; + +private: + + void leftRotate(IntervalTreeNode* node); + void rightRotate(IntervalTreeNode* node); + void insertHelp(IntervalTreeNode* node); + void TreePrintHelper(IntervalTreeNode* node) const; + void fixMaxHigh(IntervalTreeNode* node); + void removeFixUp(IntervalTreeNode* node); + static bool overlap(const T low1, const T high1, const T low2, const T high2); + void findOverlaps(IntervalTreeNode* x, const AbstractInterval& interval, SYSpVector, 256>& intervals) const; + IntervalTreeNode* getSuccessorOf(IntervalTreeNode* node) const; + void deleteNode(IntervalTreeNode* node); + +#ifndef NDEBUG + void checkMaxHighFields(IntervalTreeNode* x) const; + T checkMaxHighFieldsHelper(IntervalTreeNode* y, const T currentHigh, T match) const; + void checkAssumptions() const; + void checkOrder(IntervalTreeNode* x) const; +#endif + +public: + + IntervalTree(); + ~IntervalTree(); + + void remove(const AbstractInterval& interval); + void insert(AbstractInterval* interval); + IntervalTreeNode* find(const AbstractInterval& interval) const; + void findOverlaps(const AbstractInterval& interval, SYSpVector, 256>& intervals) const; + void clear(); + IntervalTreeNode* getMin() const; + IntervalTreeNode* getNext(IntervalTreeNode* node) const; + bool getMin(T& v) const; + bool getMax(T& v) const; +}; + +template inline IntervalTree::IntervalTree() { + nil_ = new IntervalTreeNode(); + nil_->left_ = nil_; + nil_->right_ = nil_; + nil_->parent_ = nil_; + nil_->red_ = false; + nil_->maxHigh_ = AbstractInterval::getSmallest(); + nil_->interval_ = new SimpleInterval(AbstractInterval::getSmallest(), AbstractInterval::getSmallest()); + root_ = new IntervalTreeNode(); + root_->parent_ = nil_; + root_->left_ = nil_; + root_->right_ = nil_; + root_->maxHigh_ = AbstractInterval::getLargest(); + root_->interval_ = new SimpleInterval(AbstractInterval::getLargest(), AbstractInterval::getLargest()); + root_->red_ = false; +} + +template inline void IntervalTree::deleteNode(IntervalTreeNode* node) { + if (node->left_ != nil_) { + deleteNode(node->left_); + } + if (node->right_ != nil_) { + deleteNode(node->right_); + } +} + +template inline IntervalTree::~IntervalTree() { + delete root_->interval_; + deleteNode(root_); + delete nil_->interval_; + delete nil_; +} + +template inline void IntervalTree::clear() { + if (root_->left_ != nil_) { + deleteNode(root_->left_); + root_->left_ = nil_; + } +} + +template inline void IntervalTree::leftRotate(IntervalTreeNode* x) { + IntervalTreeNode* y = x->right_; + x->right_ = y->left_; + if (y->left_ != nil_) { + y->left_->parent_ = x; + } + y->parent_ = x->parent_; + if (x == x->parent_->left_) { + x->parent_->left_ = y; + } else { + x->parent_->right_ = y; + } + y->left_ = x; + x->parent_ = y; + x->maxHigh_ = std::max(x->left_->maxHigh_, std::max(x->right_->maxHigh_, x->interval_->getMax())); + y->maxHigh_ = std::max(x->maxHigh_, std::max(y->right_->maxHigh_, y->interval_->getMax())); +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::rightRotate(IntervalTreeNode* y) { + IntervalTreeNode* x = y->left_; + y->left_ = x->right_; + if (nil_ != x->right_) { + x->right_->parent_ = y; + } + x->parent_ = y->parent_; + if (y == y->parent_->left_) { + y->parent_->left_ = x; + } else { + y->parent_->right_ = x; + } + x->right_ = y; + y->parent_ = x; + y->maxHigh_ = std::max(y->left_->maxHigh_, std::max(y->right_->maxHigh_, y->interval_->getMax())); + x->maxHigh_ = std::max(x->left_->maxHigh_, std::max(y->maxHigh_, x->interval_->getMax())); +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::insertHelp(IntervalTreeNode* z) { + z->left_ = nil_; + z->right_ = nil_; + IntervalTreeNode* x = root_->left_; + IntervalTreeNode* y = root_; + while (x != nil_) { + y = x; + if (x->interval_->compareTo(*z->interval_) > 0) { + x = x->left_; + } else { + x = x->right_; + } + } + z->parent_ = y; + if (y == root_ || y->interval_->compareTo(*z->interval_) > 0) { + y->left_ = z; + } else { + y->right_ = z; + } + assert(!nil_->red_); + assert(nil_->maxHigh_ == AbstractInterval::getSmallest()); +} + +template inline void IntervalTree::fixMaxHigh(IntervalTreeNode* x) { + while (x != root_) { + x->maxHigh_ = std::max(x->interval_->getMax(), std::max(x->left_->maxHigh_, x->right_->maxHigh_)); + x = x->parent_; + } +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::insert(AbstractInterval* interval) { + IntervalTreeNode* x = new IntervalTreeNode(interval); + insertHelp(x); + fixMaxHigh(x->parent_); + assert(x != nil_); + x->red_ = true; + while (x->parent_->red_) { + if (x->parent_ == x->parent_->parent_->left_) { + IntervalTreeNode* y = x->parent_->parent_->right_; + if (y->red_) { + x->parent_->red_ = false; + y->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + x = x->parent_->parent_; + } else { + if (x == x->parent_->right_) { + x = x->parent_; + leftRotate(x); + } + x->parent_->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + rightRotate(x->parent_->parent_); + } + } else { + IntervalTreeNode* y = x->parent_->parent_->left_; + if (y->red_) { + x->parent_->red_ = false; + y->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + x = x->parent_->parent_; + } else { + if (x == x->parent_->left_) { + x = x->parent_; + rightRotate(x); + } + x->parent_->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + leftRotate(x->parent_->parent_); + } + } + } + root_->left_->red_ = false; +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline IntervalTreeNode* IntervalTree::find(const AbstractInterval& interval) const { + IntervalTreeNode* x = root_->left_; + while (x != nil_) { + const int cmp = x->interval_->compareTo(interval); + if (cmp > 0) { + x = x->left_; + } else if (cmp < 0) { + x = x->right_; + } else { + return x; + } + } + return 0; +} + +// STATIC +template inline bool IntervalTree::overlap(const T low1, const T high1, const T low2, const T high2) { + if (low1 <= low2) { + return low2 <= high1; + } else { + return low1 <= high2; + } +} + +template inline void IntervalTree::findOverlaps(IntervalTreeNode* x, const AbstractInterval& interval, SYSpVector, 256>& intervals) const { + if (x == nil_) { + return; + } + const T low = interval.getMin(); + if (low > x->maxHigh_) { + return; + } + findOverlaps(x->left_, interval, intervals); + const T high = interval.getMax(); + if (IntervalTree::overlap(low, high, x->interval_->getMin(), x->interval_->getMax())) { + intervals.append(x->interval_); + } + if (high < x->interval_->getMin()) { + return; + } + findOverlaps(x->right_, interval, intervals); +} + +template inline void IntervalTree::findOverlaps(const AbstractInterval& interval, SYSpVector, 256>& intervals) const { + findOverlaps(root_->left_, interval, intervals); +} + +template inline IntervalTreeNode* IntervalTree::getSuccessorOf(IntervalTreeNode* x) const { + IntervalTreeNode* y = x->right_; + if (y != nil_) { + while (y->left_ != nil_) { + y = y->left_; + } + return y; + } else { + y = x->parent_; + while (x == y->right_) { + x = y; + y = y->parent_; + } + if (y == root_) { + return nil_; + } + return y; + } +} +template inline void IntervalTree::removeFixUp(IntervalTreeNode* x) { + IntervalTreeNode* rootLeft = root_->left_; + while (!x->red_ && rootLeft != x) { + if (x == x->parent_->left_) { + IntervalTreeNode* w = x->parent_->right_; + if (w->red_) { + w->red_ = false; + assert(x->parent_ != nil_); + x->parent_->red_ = true; + leftRotate(x->parent_); + w = x->parent_->right_; + } + if (!w->right_->red_ && !w->left_->red_) { + assert(w != nil_); + w->red_ = true; + x = x->parent_; + } else { + if (!w->right_->red_) { + w->left_->red_ = false; + assert(w != nil_); + w->red_ = true; + rightRotate(w); + w = x->parent_->right_; + } + assert(!x->parent_->red_ || w != nil_); + w->red_ = x->parent_->red_; + x->parent_->red_ = false; + w->right_->red_ = false; + leftRotate(x->parent_); + x = rootLeft; + } + } else { + IntervalTreeNode* w = x->parent_->left_; + if (w->red_) { + w->red_ = false; + assert(x->parent_ != nil_); + x->parent_->red_ = true; + rightRotate(x->parent_); + w = x->parent_->left_; + } + if (!w->right_->red_ && !w->left_->red_) { + assert(w != nil_); + w->red_ = true; + x = x->parent_; + } else { + if (!w->left_->red_) { + w->right_->red_ = false; + assert(w != nil_); + w->red_ = true; + leftRotate(w); + w = x->parent_->left_; + } + assert(!x->parent_->red_ || w != nil_); + w->red_ = x->parent_->red_; + x->parent_->red_ = false; + w->left_->red_ = false; + rightRotate(x->parent_); + x=rootLeft; + } + } + } + x->red_ = false; +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::remove(const AbstractInterval& interval) { + IntervalTreeNode* z = find(interval); + if (z == 0) { + // Not found: do nothing. + return; + } + IntervalTreeNode* y = (z->left_ == nil_ || z->right_ == nil_) ? z : getSuccessorOf(z); + IntervalTreeNode* x = (y->left_ == nil_) ? y->right_ : y->left_; + x->parent_ = y->parent_; + if (root_ == x->parent_) { + root_->left_ = x; + } else { + if (y == y->parent_->left_) { + y->parent_->left_ = x; + } else { + y->parent_->right_ = x; + } + } + if (y != z) { + assert(y != nil_); + y->maxHigh_ = AbstractInterval::getSmallest(); + y->left_ = z->left_; + y->right_ = z->right_; + y->parent_ = z->parent_; + z->left_->parent_ = y; + z->right_->parent_ = y; + if (z == z->parent_->left_) { + z->parent_->left_ = y; + } else { + z->parent_->right_ = y; + } + fixMaxHigh(x->parent_); + if (!y->red_) { + assert(!z->red_ || z != nil_); + y->red_ = z->red_; + removeFixUp(x); + } else { + assert(!z->red_ || z != nil_); + y->red_ = z->red_; + } + z->left_ = nil_; + z->right_ = nil_; + delete z; + } else { + fixMaxHigh(x->parent_); + if (!y->red_) { + removeFixUp(x); + } + y->left_ = nil_; + y->right_ = nil_; + delete y; + } +#ifndef NDEBUG + checkAssumptions(); + assert(find(interval) == 0); +#endif +} + +template inline IntervalTreeNode* IntervalTree::getMin() const { + IntervalTreeNode* x = root_->left_; + if (x == nil_) { + return 0; + } else { + while (x->left_ != nil_) { + x = x->left_; + } + return x; + } +} + +template inline IntervalTreeNode* IntervalTree::getNext(IntervalTreeNode* node) const { + IntervalTreeNode* x = getSuccessorOf(node); + return x == nil_ ? 0 : x; +} + +template inline bool IntervalTree::getMin(T& v) const { + IntervalTreeNode* x = root_->left_; + if (x == nil_) { + return false; + } else { + while (x->left_ != nil_) { + x = x->left_; + } + v = x->interval_->getMin(); + return true; + } +} + +template inline bool IntervalTree::getMax(T& v) const { + if (root_->left_ == nil_) { + return false; + } else { + v = root_->left_->maxHigh_; + return true; + } +} + +#ifndef NDEBUG + +template inline T IntervalTree::checkMaxHighFieldsHelper(IntervalTreeNode* y, const T currentHigh, T match) const { + if (y != nil_) { + match = checkMaxHighFieldsHelper(y->left_, currentHigh, match) ? 1 : match; + assert(y->interval_->getMax() <= currentHigh); + if (y->interval_->getMax() == currentHigh) { + match = 1; + } + match = checkMaxHighFieldsHelper(y->right_, currentHigh, match) ? 1 : match; + } + return match; +} + +template inline void IntervalTree::checkMaxHighFields(IntervalTreeNode* x) const { + if (x != nil_) { + checkMaxHighFields(x->left_); + assert(checkMaxHighFieldsHelper(x, x->maxHigh_, 0) > 0); + checkMaxHighFields(x->right_); + } +} + +template inline void IntervalTree::checkOrder(IntervalTreeNode* x) const { + if (x != nil_) { + assert(x->left_ == nil_ || x->interval_->compareTo(*x->left_->interval_) > 0); + checkOrder(x->left_); + assert(x->right_ == nil_ || x->interval_->compareTo(*x->right_->interval_) <= 0); + checkOrder(x->right_); + } +} + +template inline void IntervalTree::checkAssumptions() const { + assert(nil_->interval_->getMin() == AbstractInterval::getSmallest()); + assert(nil_->interval_->getMax() == AbstractInterval::getSmallest()); + assert(nil_->maxHigh_ == AbstractInterval::getSmallest()); + assert(root_->interval_->getMin() == AbstractInterval::getLargest()); + assert(root_->interval_->getMax() == AbstractInterval::getLargest()); + assert(root_->maxHigh_ == AbstractInterval::getLargest()); + assert(nil_->red_ == false); + assert(root_->red_ == false); +#if 0 + // This can be very expensive if there are a lot of nodes! + checkMaxHighFields(root_->left_); + checkOrder(root_->left_); +#endif +} + +#endif + +} + +#endif /* #ifndef _engine_intervaltree_h_ */ diff --git a/storage/sparrow/api/ipaddress.cc b/storage/sparrow/api/ipaddress.cc new file mode 100644 index 000000000000..fe037c727dcc --- /dev/null +++ b/storage/sparrow/api/ipaddress.cc @@ -0,0 +1,371 @@ +/* + IP address. +*/ + +#include "ipaddress.h" + +#include +#include +#include +#include + +namespace IvFunctions { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IpAddress +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Prints an IP address. Returns the length of the output string, or 0 in case of error. +uint32_t IpAddress::print(char* buffer) const { + if (!isValid()) { + return 0; + } + if (isV4()) { + const uint8_t* bytes = (length_ == 4 ? bytes_ : bytes_ + 12); + return sprintf(buffer, "%u.%u.%u.%u", bytes[0], bytes[1], bytes[2], bytes[3]); + } else { + bool ok = false; + + // Check for IPv6-compatible, IPv4-mapped, and IPv4-translated addresses. + if (bytes_[0] == 0 && bytes_[1] == 0 && bytes_[2] == 0 && bytes_[3] == 0 + && bytes_[4] == 0 && bytes_[5] == 0 && bytes_[6] == 0 && bytes_[7] == 0 + && (bytes_[12] != 0 || bytes_[13] != 0)) { + if (bytes_[8] == 0 && bytes_[9] == 0 + && ((bytes_[10] == 0 && bytes_[11] == 0) || (bytes_[10] == 0xff && bytes_[11] == 0xff))) { + // Compatible or mapped. + sprintf(buffer, "::%s%u.%u.%u.%u", bytes_[10] == 0 ? "" : "ffff:", + bytes_[12], bytes_[13], bytes_[14], bytes_[15]); + ok = true; + } + else if (bytes_[8] == 0xff && bytes_[9] == 0xff && bytes_[10] == 0 && bytes_[11] == 0) { + // Compatible or mapped. + sprintf(buffer, "::ffff:0:%u.%u.%u.%u", bytes_[12], bytes_[13], bytes_[14], bytes_[15]); + ok = true; + } + } + if (!ok) { + int maxFirst = 0; + int maxLast = 0; + int curFirst = 0; + int curLast = 0; + for (int i = 0; i < 8; ++i) { + if (bytes_[i * 2] == 0 && bytes_[i * 2 + 1] == 0) { + // Extend current substring. + curLast = i + 1; + + // Check if current is now largest. + if (curLast - curFirst > maxLast - maxFirst) { + maxFirst = curFirst; + maxLast = curLast; + } + } else { + // Start a new substring. + curFirst = i + 1; + curLast = i + 1; + } + } + + // Ignore a substring of length 1. + if (maxLast - maxFirst <= 1) { + maxFirst = maxLast = 0; + } + + // Write colon-separated words. + // A double-colon takes the place of the longest string of zeroes. + // All zeroes is just "::". + char* tmpBuffer = buffer; + for (int i = 0; i < 8; ++i) { + // Skip over string of zeroes. + if (maxFirst <= i && i < maxLast) { + tmpBuffer += sprintf(tmpBuffer, "::"); + i = maxLast - 1; + continue; + } + + // Need colon separator if not at beginning. + if (i != 0 && i != maxLast) { + *tmpBuffer++ = ':'; + } + tmpBuffer += sprintf(tmpBuffer, "%x", (bytes_[i * 2] << 8) | bytes_[i * 2 + 1]); + } + } + return static_cast(strlen(buffer)); + } +} + +// Parses an IP address. Returns true if OK. +bool IpAddress::parse(const char* buffer, uint32_t length) { + + // Consider the input string is not null-terminated. + // Try IPv4 first. + if (length >= 7 && length <= 15) { + int n = 0; + int byte = 0; + bool ok = true; + for (uint32_t i = 0; i < length; ++i) { + char c = buffer[i]; + if (isdigit(c)) { + n = n * 10 + static_cast(c - '0'); + if (n > 255) { + ok = false; + break; + } + } + if (c == '.' || i + 1 == length) { + if (byte == 4) { + ok = false; + break; + } + bytes_[byte++] = n; + n = 0; + } else if (!isdigit(c)) { + ok = false; + break; + } + } + if (ok && byte == 4) { + length_ = 4; + return true; + } + } + + // Not IPv4: try IPv6. + enum + { + start, + inNumber, + afterDoubleColon + } state = start; + + bool result = true; + int number = -1; + bool sawHex = false; + int numColons = 0, numDots = 0; + int sawDoubleColon = 0; + int i = 0; + uint32_t l = 0; + while (l < length) { + char c = buffer[l]; + switch (state) { + case start: + if (c == ':') { + // This case only handles double-colon at the beginning. + if (numDots > 0 || numColons > 0 || buffer[1] != ':') { + goto finish; + } + sawDoubleColon = 1; + numColons = 2; + bytes_[i * 2] = 0; // Pretend it was 0:: + bytes_[i * 2 + 1] = 0; + i++; + l++; + state = afterDoubleColon; + break; + } + [[fallthrough]]; + case afterDoubleColon: + if (isdigit(c)) { + sawHex = false; + number = l; + state = inNumber; + } else if (isxdigit(c)) { + if (numDots > 0) { + goto finish; + } + sawHex = true; + number = l; + state = inNumber; + } else { + goto finish; + } + break; + case inNumber: + if (isdigit(c)) { + // Remain in InNumber state. + } else if (isxdigit(c)) { + if (numDots > 0) { + goto finish; + } + sawHex = true; + // Remain in InNumber state. + } + else if (c == ':') { + if (numDots > 0) { + goto finish; + } + if (numColons > 6) { + goto finish; + } + if (buffer[l + 1] == ':') { + if (sawDoubleColon || numColons > 5) { + goto finish; + } + sawDoubleColon = numColons + 1; + numColons += 2; + l++; + state = afterDoubleColon; + } else { + numColons++; + state = start; + } + } + else if (c == '.') { + if (sawHex || numDots > 2 || numColons > 6) { + goto finish; + } + numDots++; + state = start; + } else { + goto finish; + } + break; + } + // If we finished a number, parse it. + if (state != inNumber && number != -1) { + // Note either numDots > 0 or numColons > 0, + // because something terminated the number. + if (numDots == 0) { + int n = parseHex(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[i * 2] = (n >> 8) & 0xff; + bytes_[i * 2 + 1] = n & 0xff; + i++; + } else { + int n = parseInt(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[2 * i + numDots-1] = static_cast(n); + } + } + l++; + } + +finish: + + // Check that we have a complete address. + if (numDots == 0) { + } else if (numDots == 3) { + numColons++; + } else { + result = false; + } + if (result) { + if (sawDoubleColon) { + } else if (numColons == 7) { + } else { + result = false; + } + if (result) { + // Parse the last number, if necessary. + if (state == inNumber) { + if (numDots == 0) { + int n = parseHex(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[i * 2] = (n >> 8) & 0xff; + bytes_[i * 2 + 1] = n & 0xff; + } else { + int n = parseInt(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[2 * i + numDots] = static_cast(n); + } + } else if (state == afterDoubleColon) { + bytes_[i * 2] = 0; // pretend it was ::0 + bytes_[i * 2 + 1] = 0; + } else { + result = false; + } + + // Insert zeroes for the double-colon, if necessary. + if (result && sawDoubleColon) { + memmove(&bytes_[(sawDoubleColon + 8 - numColons) * 2], + &bytes_[sawDoubleColon * 2], (numColons - sawDoubleColon) * 2); + memset(&bytes_[sawDoubleColon * 2], 0, (8 - numColons) * 2); + } + } + } + return result; +} + +// Helper methods to parse decimal (0..255) and hex numbers (0..ffff). +// They return -1 in case of error. +// STATIC +int IpAddress::parseInt(const char* buffer, uint32_t length) { + int n = 0; + bool hasDigit = false; + for (uint32_t i = 0; i < length; ++i) { + char c = buffer[i]; + if (isdigit(c)) { + hasDigit = true; + n = n * 10 + static_cast(c - '0'); + } else { + if (isspace(c)) { + if (hasDigit) { + break; + } + } else { + break; + } + } + } + return (hasDigit && n <= 255) ? n : -1; +} + +// STATIC +int IpAddress::parseHex(const char* buffer, uint32_t length) { + int n = 0; + bool hasDigit = false; + for (uint32_t i = 0; i < length; ++i) { + char c = buffer[i]; + if (isxdigit(c)) { + hasDigit = true; + int hex = c >= 'a' && c <= 'f' ? c - 'a' + 10 : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - '0'; + n = n * 16 + hex; + } else { + if (isspace(c)) { + if (hasDigit) { + break; + } + } else { + break; + } + } + } + return (hasDigit && n <= 65535) ? n : -1; +} + +bool IpAddress::applyMask(const IpAddress& mask) { + if (isV4()) { + if (!mask.isV4()) { + return false; + } + uint8_t* bytes = (length_ == 4 ? bytes_ : bytes_ + 12); + const uint8_t* mbytes = (mask.length_ == 4 ? mask.bytes_ : mask.bytes_ + 12); + for (uint32_t i = 0; i < 4; ++i) { + bytes[i] &= mbytes[i]; + } + return true; + } else { + for (uint32_t i = 0; i < mask.length_; ++i) { + bytes_[i] &= mask.bytes_[i]; + } + return true; + } +} + +void IpAddress::makeMask(int bits) { + memset(bytes_, 0, length_); + int i = 0; + while (bits > 0) { + bytes_[i++] = bits >= 8 ? 0xff : (((1 << bits) - 1) << (7 - bits)); + bits -= 8; + } +} + +} diff --git a/storage/sparrow/api/ipaddress.h b/storage/sparrow/api/ipaddress.h new file mode 100644 index 000000000000..4bb92ea4eb05 --- /dev/null +++ b/storage/sparrow/api/ipaddress.h @@ -0,0 +1,74 @@ +/* + IP address. +*/ + +#ifndef _functions_ipaddress_h_ +#define _functions_ipaddress_h_ + + +#include +#include + +namespace IvFunctions { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IpAddress +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IpAddress { +private: + + uint8_t* bytes_; + uint32_t length_; + +private: + + static int parseInt(const char* buffer, uint32_t length); + static int parseHex(const char* buffer, uint32_t length); + +public: + + IpAddress() : bytes_(0), length_(0) { + } + + IpAddress(const uint8_t* bytes, uint32_t length) : bytes_(const_cast(bytes)), length_(length) { + } + + bool isValid() const { + return bytes_ != 0 && (length_ == 4 || length_ == 16); + } + + bool isV4() const { + if (length_ == 4) { + return true; + } else { + for (int i = 0; i < 12; ++i) { + if (bytes_[i] != 0) { + return false; + } + } + return true; + } + } + + bool isPrivate() const { + if (isV4()) { + const uint32_t check = length_ == 4 ? ((bytes_[0] << 8) | bytes_[1]) : ((bytes_[12] << 8) | bytes_[13]); + return (check & 0xff00) == 0xa00 || check == 0xc0a8 || (check >> 4) == 0xac1; + } else { + return bytes_[0] == static_cast(0xfd); + } + } + + uint32_t print(char* buffer) const; + + bool parse(const char* buffer, uint32_t length); + + bool applyMask(const IpAddress& mask); + + void makeMask(int bits); +}; + +} + +#endif /* #ifndef _functions_ipaddress_h_ */ diff --git a/storage/sparrow/api/list.h b/storage/sparrow/api/list.h new file mode 100644 index 000000000000..0dd2abbc8e11 --- /dev/null +++ b/storage/sparrow/api/list.h @@ -0,0 +1,1005 @@ +/* + Non-intrusive single-linked list and intrusive double-linked lists. + */ + +#ifndef _spw_api_list_h_ +#define _spw_api_list_h_ + +#include "api_assert.h" + +namespace Sparrow { + +// constant for "not found" +#ifndef SYS_NPOS +#define SYS_NPOS (~(static_cast(0))) +#endif + +template class SYSslink { +public: + + SYSslink(const T& object, SYSslink* next); + SYSslink* getNext() const; + void setNext(SYSslink* next); + const T& getObject() const; + T& getObject(); + void setObject(const T& object); + +protected: + + T object_; + SYSslink* next_; +}; + +template inline SYSslink::SYSslink(const T& object, SYSslink* next) : object_(object), next_(next) { +} + +template inline SYSslink* SYSslink::getNext() const { + return next_; +} + +template inline void SYSslink::setNext(SYSslink* next) { + next_ = next; +} + +template inline const T& SYSslink::getObject() const { + return object_; +} + +template inline T& SYSslink::getObject() { + return object_; +} + +template inline void SYSslink::setObject(const T& object) { + object_ = object; +} + +// +// Default allocator for single-linked lists. +// +template class SYSslAllocator { +public: + + SYSslAllocator() { + } + SYSslink* acquire(const T& object, SYSslink* next) { + return new SYSslink(object, next); + } + void release(SYSslink* link) { + delete link; + } +}; + +// +// Pool allocator for single-linked lists. +// +template class SYSslPoolAllocator { +private: + + SYSslink* root_; + +public: + + SYSslPoolAllocator() : + root_(0) { + } + ~SYSslPoolAllocator() { + SYSslink* link = root_; + while (link != 0) { + SYSslink* next = link->getNext(); + delete link; + link = next; + } + } + SYSslink* acquire(const T& object, SYSslink* next) { + if (root_ == 0) { + SYSslink* l = new SYSslink(object, next); + if (l == 0) { + printf("SYShPoolAllocator::acquire: cannot allocate %llu bytes of memory", static_cast(sizeof(*l))); + } + return l; + } else { + SYSslink* link = root_; + root_ = root_->getNext(); + link->setObject(object); + link->setNext(next); + return link; + } + } + void release(SYSslink* link) { + link->setNext(root_); + root_ = link; + } +}; + +template class SYSslistIterator; + +template > class SYSslist: public A { + friend class SYSslistIterator ; + +public: + + // constructors + SYSslist(); + SYSslist(const SYSslist& right); + + // destructor + ~SYSslist(); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + const T& operator [](const uint32_t index) const; + T& operator [](const uint32_t index); + const T& first() const; + const T& last() const; + T& first(); + T& last(); + + // operations + void insert(const T& t); + void append(const T& t); + void prepend(const T& t); + void insertAt(const uint32_t index, const T& t); + bool remove(const T& t); + T removeAt(uint32_t index); + void removeLast(); + void removeFirst(); + uint32_t index(const T& t) const; + bool contains(const T& t) const; + bool find(const T& t, T& result) const; + void clear(); + void getAll(SYSslist& list) { + list.first_ = first_; + list.last_ = last_; + list.n_ = n_; + first_ = 0; + last_ = 0; + n_ = 0; + } + void appendAll(SYSslist& list) { + if (!list.isEmpty()) { + if (isEmpty()) { + first_ = list.first_; + last_ = list.last_; + } else { + last_->setNext(list.first_); + last_ = list.last_; + } + n_ += list.n_; + list.first_ = 0; + list.last_ = 0; + list.n_ = 0; + } + } + + /*void release(SYSslink* link) { + A::release( link ); + }*/ + + // copy + SYSslist& operator =(const SYSslist& right); + +protected: + + SYSslink* atPosition(uint32_t index) const; + +private: + + // equality not implemented + bool operator ==(const SYSslist& right) const; + +protected: + + SYSslink* first_; + SYSslink* last_; + uint32_t n_; +}; + +template inline uint32_t SYSslist::entries() const { + return n_; +} + +template inline bool SYSslist::isEmpty() const { + return (n_ == 0); +} + +template inline SYSslink* SYSslist::atPosition(uint32_t index) const { + SPW_dbgASSERT(index < n_); + SYSslink* sl = first_; + while (sl != 0) { + if (index-- == 0) { + return sl; + } + sl = sl->getNext(); + } + return 0; // not reached +} + +template inline void SYSslist::insert(const T& t) { + SYSslink* sl = this->acquire(t, 0); + if (n_ == 0) { + first_ = sl; + last_ = first_; + } else { + last_->setNext(sl); + last_ = sl; + } + n_++; +} + +template inline void SYSslist::append(const T& t) { + insert(t); +} + +template inline void SYSslist::insertAt(const uint32_t index, const T& t) { + SPW_dbgASSERT(index <= n_); + SYSslink* nsl = this->acquire(t, 0); + SYSslink* psl = index == 0 ? 0 : atPosition(index - 1); + if (psl == 0) { + nsl->setNext(first_); + first_ = nsl; + if (n_ == 0) { + last_ = first_; + } + } else { + nsl->setNext(psl->getNext()); + psl->setNext(nsl); + if (psl == last_) { + last_ = nsl; + } + } + n_++; +} + +template inline void SYSslist::prepend(const T& t) { + insertAt(0, t); +} + +template inline T SYSslist::removeAt(uint32_t index) { + SPW_dbgASSERT(index < n_); + T result; + SYSslink* sl = first_; + if (n_ == 1) { + result = sl->getObject(); + this->release(sl); + first_ = 0; + last_ = 0; + n_ = 0; + } else { + SYSslink* psl = 0; + while (sl != 0) { + if (index-- == 0) { + if (psl != 0) { + psl->setNext(sl->getNext()); + } + if (sl == first_) { + first_ = sl->getNext(); + } else if (sl == last_) { + last_ = psl; + } + n_--; + result = sl->getObject(); + this->release(sl); + break; + } else { + psl = sl; + sl = sl->getNext(); + } + } + } + return result; +} + +template inline void SYSslist::removeLast() { + SPW_dbgASSERT(n_ > 0); + removeAt(n_ - 1); +} + +template inline void SYSslist::removeFirst() { + SPW_dbgASSERT(n_ > 0); + removeAt(0); +} + +template inline bool SYSslist::remove(const T& t) { + bool result = false; + SYSslink* sl = first_; + if (n_ == 1) { + if (sl->getObject() == t) { + this->release(sl); + first_ = 0; + last_ = 0; + n_ = 0; + result = true; + } + } else if (n_ > 1) { + SYSslink* psl = 0; + while (sl != 0) { + if (sl->getObject() == t) { + if (psl != 0) { + psl->setNext(sl->getNext()); + } + if (sl == first_) { + first_ = sl->getNext(); + } else if (sl == last_) { + last_ = psl; + } + n_--; + result = true; + this->release(sl); + break; + } else { + psl = sl; + sl = sl->getNext(); + } + } + } + return result; +} + +template inline void SYSslist::clear() { + SYSslink* sl = first_; + SYSslink* psl = 0; + while (sl != 0) { + psl = sl; + sl = sl->getNext(); + this->release(psl); + } + first_ = 0; + last_ = 0; + n_ = 0; +} + +template inline SYSslist::~SYSslist() { + clear(); +} + +template inline uint32_t SYSslist::index(const T& t) const { + uint32_t result = 0; + SYSslink* sl = first_; + while (sl != 0) { + if (sl->getObject() == t) { + return result; + } + sl = sl->getNext(); + result++; + } + return SYS_NPOS; +} + +template inline bool SYSslist::find(const T& t, T& result) const { + SYSslink* sl = first_; + while (sl != 0) { + if (sl->getObject() == t) { + result = sl->getObject(); + return true; + } + sl = sl->getNext(); + } + return false; +} + +template inline bool SYSslist::contains(const T& t) const { + return (index(t) != SYS_NPOS); +} + +template inline SYSslist::SYSslist() : first_(0), last_(0), n_(0) { +} + +template inline const T& SYSslist::operator [](const uint32_t index) const { + const SYSslink* sl = atPosition(index); + return sl->getObject(); +} + +template inline T& SYSslist::operator [](const uint32_t index) { + SYSslink* sl = atPosition(index); + return sl->getObject(); +} + +template inline const T& SYSslist::first() const { + SPW_dbgASSERT(n_ > 0); + return first_->getObject(); +} + +template inline const T& SYSslist::last() const { + SPW_dbgASSERT(n_ > 0); + return last_->getObject(); +} + +template inline T& SYSslist::first() { + SPW_dbgASSERT(n_ > 0); + return first_->getObject(); +} + +template inline T& SYSslist::last() { + SPW_dbgASSERT(n_ > 0); + return last_->getObject(); +} + +template > class SYSslistIterator : public A { +public: + + // constructor + SYSslistIterator(SYSslist& list); + + // operators + bool operator ++(); + bool operator ()(); + + // operations + void reset(); + const T& key() const; + T& key(); + bool remove(); + +private: + + // copy, assignment and equality are forbidden + SYSslistIterator(const SYSslistIterator& right); + SYSslistIterator& operator =(const SYSslistIterator& right); + bool operator ==(const SYSslistIterator& right) const; + +protected: + + SYSslist& list_; + SYSslink* psl_; + SYSslink* sl_; +}; + +template inline void SYSslistIterator::reset() { + psl_ = 0; + sl_ = 0; +} + +template inline SYSslistIterator::SYSslistIterator(SYSslist& list) : + list_(list) { + reset(); +} + +template inline bool SYSslistIterator::operator ++() { + // first time? + if (psl_ == 0 && sl_ == 0) { + sl_ = list_.first_; + } else if (sl_ != 0) { + psl_ = sl_; + sl_ = sl_->getNext(); + } + return (sl_ != 0); +} + +template inline bool SYSslistIterator::operator ()() { + return ++(*this); +} + +template inline const T& SYSslistIterator::key() const { + return sl_->getObject(); +} + +template inline T& SYSslistIterator::key() { + return sl_->getObject(); +} + +template bool SYSslistIterator::remove() { + if (sl_ != 0) { + if (list_.entries() == 1) { + list_.clear(); + psl_ = 0; + sl_ = 0; + } else if (sl_ == list_.first_) { + list_.removeAt(0); + psl_ = 0; + sl_ = list_.first_; + } else if (sl_ == list_.last_) { + sl_ = 0; + list_.removeLast(); + } else { + // remove current + psl_->setNext(sl_->getNext()); + this->release(sl_); + sl_ = psl_->getNext(); + list_.n_--; + } + return true; + } else { + return false; + } +} + +// copy operator/constructor for SYSslist: need iterator +template SYSslist& SYSslist::operator =(const SYSslist& right) { + clear(); + SYSslistIterator iterator((SYSslist&) right); + while (iterator()) { + insert(iterator.key()); + } + return *this; +} + +template SYSslist::SYSslist(const SYSslist& right) : first_(0), last_(0), n_(0) { + *this = right; +} + +template class SYSpSlistIterator; + +template > class SYSpSlist: public SYSslist { + friend class SYSpSlistIterator ; + +public: + + // constructors + SYSpSlist(); + + // accessors + T* first() const; + T* last() const; + + // operations + T* remove(const T* t); + uint32_t index(const T* t) const; + bool contains(const T* t) const; + T* find(const T* t) const; + void clearAndDestroy(); + +private: + + // equality not implemented + bool operator ==(const SYSpSlist& right) const; +}; + +template inline SYSpSlist::SYSpSlist() : SYSslist() { +} + +template inline T* SYSpSlist::first() const { + return (this->isEmpty() ? 0 : this->first_->getObject()); +} + +template inline T* SYSpSlist::last() const { + return (this->isEmpty() ? 0 : this->last_->getObject()); +} + +template inline T* SYSpSlist::remove(const T* t) { + T* result = 0; + SYSslink* sl = this->first_; + if (this->n_ == 1) { + if (*sl->getObject() == *t) { + result = sl->getObject(); + this->release(sl); + this->first_ = 0; + this->last_ = 0; + this->n_ = 0; + } + } else if (this->n_ > 1) { + SYSslink* psl = 0; + while (sl != 0) { + if (*sl->getObject() == *t) { + result = sl->getObject(); + if (psl != 0) { + psl->setNext(sl->getNext()); + } + if (sl == this->first_) { + this->first_ = sl->getNext(); + } else if (sl == this->last_) { + this->last_ = psl; + } + this->n_--; + this->release(sl); + break; + } else { + psl = sl; + sl = sl->getNext(); + } + } + } + return result; +} + +template inline void SYSpSlist::clearAndDestroy() { + SYSslink* sl = this->first_; + SYSslink* psl = 0; + while (sl != 0) { + psl = sl; + sl = sl->getNext(); + T* object = psl->getObject(); + delete object; + this->release(psl); + } + this->first_ = 0; + this->last_ = 0; + this->n_ = 0; +} + +template inline uint32_t SYSpSlist::index(const T* t) const { + uint32_t result = 0; + SYSslink* sl = this->first_; + while (sl != 0) { + if (*sl->getObject() == *t) { + return result; + } + sl = sl->getNext(); + result++; + } + return SYS_NPOS; +} + +template inline T* SYSpSlist::find(const T* t) const { + T* result = 0; + SYSslink* sl = this->first_; + while (sl != 0) { + if (*sl->getObject() == *t) { + result = sl->getObject(); + break; + } + sl = sl->getNext(); + } + return result; +} + +template inline bool SYSpSlist::contains(const T* t) const { + return (this->index(t) != SYS_NPOS); +} + +template > class SYSpSlistIterator { +public: + + // constructor + SYSpSlistIterator(SYSpSlist& list); + + // operators + bool operator ++(); + T* operator ()(); + + // operations + void reset(); + const T* key() const; + T* key(); + bool remove(); + +private: + + // copy, assignment and equality are forbidden + SYSpSlistIterator(const SYSpSlistIterator& right); + SYSpSlistIterator& operator =(const SYSpSlistIterator& right); + bool operator ==(const SYSpSlistIterator& right) const; + +protected: + + SYSpSlist& list_; + SYSslink* psl_; + SYSslink* sl_; +}; + +template inline void SYSpSlistIterator::reset() { + psl_ = 0; + sl_ = 0; +} + +template inline SYSpSlistIterator::SYSpSlistIterator(SYSpSlist& list) : + list_(list) { + reset(); +} + +template inline bool SYSpSlistIterator::operator ++() { + // first time? + if (psl_ == 0 && sl_ == 0) { + sl_ = list_.first_; + } else if (sl_ != 0) { + psl_ = sl_; + sl_ = sl_->getNext(); + } + return (sl_ != 0); +} + +template inline T* SYSpSlistIterator::operator ()() { + if (++(*this)) { + return sl_->getObject(); + } + return 0; +} + +template inline const T* SYSpSlistIterator::key() const { + return sl_->getObject(); +} + +template inline T* SYSpSlistIterator::key() { + return sl_->getObject(); +} + +template inline bool SYSpSlistIterator::remove() { + if (sl_ != 0) { + if (list_.entries() == 1) { + list_.clear(); + psl_ = 0; + sl_ = 0; + } else if (sl_ == list_.first_) { + list_.removeAt(0); + psl_ = 0; + sl_ = list_.first_; + } else if (sl_ == list_.last_) { + sl_ = 0; + list_.removeLast(); + } else { + // remove current + psl_->setNext(sl_->getNext()); + this->release(sl_); + sl_ = psl_->getNext(); + list_.n_--; + } + return true; + } else { + return false; + } +} + +template class SYSidlink { +public: + + SYSidlink(); + +public: + + T* prev_; + T* next_; +}; + +template inline SYSidlink::SYSidlink() : + prev_(0), next_(0) { +} + +template class SYSidlistIterator; + +template class SYSidlist { + friend class SYSidlistIterator ; + +public: + + // constructor + SYSidlist(); + + // destructor + ~SYSidlist(); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + T* last() const; + T* first() const; + + // operations + T* remove(T* t); + T* removeFirst(); + void prepend(T* t); + void append(T* t); + void clear() { + first_ = 0; + last_ = 0; + n_ = 0; + } + void append(SYSidlist& list) { + n_ += list.n_; + if (last_ == 0) { + first_ = list.first_; + last_ = list.last_; + } else { + last_->next_ = list.first_; + if (list.first_ != 0) { + list.first_->prev_ = last_; + } + if (list.last_ != 0) { + last_ = list.last_; + } + } + } + bool contains(T* t) const { + T* it = first_; + while (it != 0) { + if (it == t) { + return true; + } + it = it->next_; + } + return false; + } + + bool contains(const T& t) const { + T* it = first_; + while (it != 0) { + if (*it == t) { + return true; + } + it = it->next_; + } + return false; + } + +protected: + + // internal link/unlink operations + void unlink(T* t); + void link(T* t, T* right); + +private: + + // copy, assignment and equality are forbidden + SYSidlist(const SYSidlist& right); + SYSidlist& operator =(const SYSidlist& right); + bool operator ==(const SYSidlist& right) const; + +protected: + + T* first_; + T* last_; + uint32_t n_; +}; + +// remove t from list +template inline void SYSidlist::unlink(T* t) { + SPW_dbgASSERT(n_ > 0); + if (t->prev_ != 0) { + SPW_dbgASSERT(t->prev_->next_ == t); + t->prev_->next_ = t->next_; + } else { + SPW_dbgASSERT(first_ == t); + first_ = t->next_; + } + if (t->next_ != 0) { + SPW_dbgASSERT(t->next_->prev_ == t); + t->next_->prev_ = t->prev_; + } else { + SPW_dbgASSERT(last_ == t); + last_ = t->prev_; + } + t->prev_ = 0; + t->next_ = 0; + n_--; +} + +// link right to the right of t (list must not be empty) +template inline void SYSidlist::link(T* t, T* right) { + SPW_dbgASSERT(first_ != 0 && last_ != 0 && t != 0 && right != 0); + right->prev_ = t; + right->next_ = t->next_; + t->next_ = right; + right->next_->prev_ = right; + if (last_ == t) { + last_ = right; + } + n_++; +} + +template inline uint32_t SYSidlist::entries() const { + return n_; +} + +template inline bool SYSidlist::isEmpty() const { + return (n_ == 0); +} + +template inline T* SYSidlist::first() const { + return first_; +} + +template inline T* SYSidlist::last() const { + return last_; +} + +template inline void SYSidlist::append(T* t) { + if (last_ == 0) { + SPW_dbgASSERT(first_ == 0); + last_ = t; + first_ = t; + t->prev_ = 0; + t->next_ = 0; + } else { + SPW_dbgASSERT(first_ != 0 && last_->next_ == 0); + last_->next_ = t; + t->prev_ = last_; + t->next_ = 0; + last_ = t; + } + n_++; +} + +template inline void SYSidlist::prepend(T* t) { + if (last_ == 0) { + SPW_dbgASSERT(first_ == 0); + last_ = t; + first_ = t; + t->prev_ = 0; + t->next_ = 0; + } else { + first_->prev_ = t; + t->prev_ = 0; + t->next_ = first_; + first_ = t; + } + n_++; +} + +template inline T* SYSidlist::removeFirst() { + if (n_ == 0) { + return 0; + } else { + T* t = first_; + unlink(first_); + return t; + } +} + +// remove an element from the list (undefined results if the element is not in the list) +template inline T* SYSidlist::remove(T* t) { + unlink(t); + return t; +} + +template inline SYSidlist::~SYSidlist() { +} + +template inline SYSidlist::SYSidlist() : + first_(0), last_(0), n_(0) { +} + +template class SYSidlistIterator { +public: + + // constructor + SYSidlistIterator(SYSidlist& list); + + // operators + T* operator ++(); + T* operator --(); + T* operator ()(); + + // operations + void reset(); + T* key() const; + void insert(T* t); + +private: + + // copy, assignment and equality are forbidden + SYSidlistIterator(const SYSidlistIterator& right); + SYSidlistIterator& operator =(const SYSidlistIterator& right); + bool operator ==(const SYSidlistIterator& right) const; + +protected: + + SYSidlist& list_; + T* l_; +}; + +template inline void SYSidlistIterator::reset() { + l_ = 0; +} + +template inline SYSidlistIterator::SYSidlistIterator(SYSidlist& list) : + list_(list) { + reset(); +} + +template inline T* SYSidlistIterator::operator ++() { + l_ = (l_ == 0 ? list_.first_ : l_->next_); + return l_; +} + +template inline T* SYSidlistIterator::operator --() { + if (l_ != 0) { + l_ = l_->prev_; + } + return l_; +} + +template inline void SYSidlistIterator::insert(T* t) { + SPW_dbgASSERT(l_ != 0); + list_.link(l_, t); +} + +template inline T* SYSidlistIterator::operator ()() { + return ++(*this); +} + +template inline T* SYSidlistIterator::key() const { + return l_; +} + +} + +#endif /* #ifndef _spw_api_list_h_ */ diff --git a/storage/sparrow/api/lock.h b/storage/sparrow/api/lock.h new file mode 100644 index 000000000000..fea8c77855f4 --- /dev/null +++ b/storage/sparrow/api/lock.h @@ -0,0 +1,313 @@ +/* + Lock types. +*/ + +#ifndef _spw_api_lock_h_ +#define _spw_api_lock_h_ + +#include "my_sys.h" +#include "include/global.h" +#include "list.h" +#include "include/thr_mutex.h" +#include "include/thr_rwlock.h" + +namespace Sparrow { + +// Simple lock. +class Lock { +private: + + const char *m_name{nullptr}; + native_mutex_t lock_; + bool static_; + +private: + + static SYSslist& getStatics() { + static SYSslist statics; + return statics; + } + +private: + + void initialize() { + native_mutex_init(&lock_, nullptr); + } + + void clear() { + if (m_name != nullptr) { + native_mutex_destroy(&lock_); + my_free(const_cast(m_name)); + m_name = nullptr; + } + } + +public: + + Lock(const bool isStatic, const char* name) : static_(isStatic) { + m_name = my_strdup(name, MYF(MY_FAE)); + if (static_) { + Lock::getStatics().append(this); + } else { + initialize(); + } + } + Lock& operator = (const Lock&) = delete; + Lock(const Lock&) = delete; + + static void initializeStatics() { + SYSslistIterator iterator(Lock::getStatics()); + while (++iterator) { + iterator.key()->initialize(); + } + } + + static void deinitializeStatics() { + SYSslistIterator iterator(Lock::getStatics()); + while (++iterator) { + iterator.key()->clear(); + } + } + + ~Lock() { + clear(); + } + + void lock() { + native_mutex_lock(&lock_); + } + + bool tryLock() { + if (native_mutex_trylock(&lock_) == 0) { + return true; + } else { + return false; + } + } + + void unlock() { + native_mutex_unlock(&lock_); + } + + const char* getName() const { + return m_name; + } + + native_mutex_t* get() { + return &lock_; + } +}; + + +// Single writer, multiple readers lock. +class RWLock { +private: + + const char *m_name{nullptr}; + native_rw_lock_t lock_; + bool static_; + +private: + + static SYSslist& getStatics() { + static SYSslist statics; + return statics; + } + + void initialize() { + native_rw_init(&lock_); + } + + void clear() { + if (m_name != nullptr) { + native_rw_destroy(&lock_); + my_free(const_cast(m_name)); + m_name = nullptr; + } + } + +public: + + RWLock(const bool isStatic, const char* name) : static_(isStatic) { + m_name = my_strdup(name, MYF(MY_FAE)); + if (static_) { + RWLock::getStatics().append(this); + } else { + initialize(); + } + } + + RWLock& operator = (const RWLock&) = delete; + RWLock(const RWLock&) = delete; + + static void initializeStatics() { + SYSslistIterator iterator(RWLock::getStatics()); + while (++iterator) { + iterator.key()->initialize(); + } + } + + static void deinitializeStatics() { + SYSslistIterator iterator(RWLock::getStatics()); + while (++iterator) { + iterator.key()->clear(); + } + } + + ~RWLock() { + clear(); + } + + void readLock() { + native_rw_rdlock(&lock_); + } + + void writeLock() { + native_rw_wrlock(&lock_); + } + + bool tryReadLock() { + if (native_rw_tryrdlock(&lock_) == 0) { + return true; + } else { + return false; + } + } + + bool tryWriteLock() { + if (native_rw_trywrlock(&lock_) == 0) { + return true; + } else { + return false; + } + } + + void unlock() { + native_rw_unlock(&lock_); + } +}; + + + + +// Simple lock guard. +class Guard { +private: + + Lock* lock_; + bool acquired_; + +public: + + Guard(Lock& lock, const bool doTry = false) : lock_(&lock) { + if (doTry) { + acquired_ = lock_->tryLock(); + } else { + lock_->lock(); + acquired_ = true; + } + } + + Guard() : lock_(0), acquired_(false) { + } + + ~Guard() { + if (acquired_ && lock_ != 0) { + lock_->unlock(); + } + } + + bool isAcquired() const { + return acquired_; + } + +private: + + Guard& operator = (const Guard&); + Guard(const Guard&); +}; + + +// Read lock guard. +class ReadGuard { +private: + + RWLock* lock_; + bool acquired_; + +public: + + ReadGuard(RWLock& lock, const bool doTry = false) : lock_(&lock) { + if (doTry) { + acquired_ = lock_->tryReadLock(); + } else { + lock_->readLock(); + acquired_ = true; + } + } + + ReadGuard() : lock_(0), acquired_(false) { + } + + ~ReadGuard() { + if (acquired_ && lock_ != 0) { + lock_->unlock(); + } + } + + bool isAcquired() const { + return acquired_; + } + +private: + + ReadGuard& operator = (const ReadGuard&); + ReadGuard(const ReadGuard&); +}; + +// Write lock guard. +class WriteGuard { +private: + + RWLock* lock_; + bool acquired_; + +public: + + WriteGuard(RWLock& lock, const bool doTry = false) : lock_(&lock) { + if (doTry) { + acquired_ = lock_->tryWriteLock(); + } else { + lock_->writeLock(); + acquired_ = true; + } + } + + WriteGuard() : lock_(0), acquired_(false) { + } + + void release() { + if (acquired_ && lock_ != 0) { + lock_->unlock(); + acquired_ = false; + } + } + + ~WriteGuard() { + release(); + } + + bool isAcquired() const { + return acquired_; + } + +private: + + WriteGuard& operator = (const WriteGuard&); + WriteGuard(const WriteGuard&); +}; + + +} + +#endif /* #ifndef _spw_api_lock_h_ */ diff --git a/storage/sparrow/api/memalloc.cc b/storage/sparrow/api/memalloc.cc new file mode 100644 index 000000000000..f745a3682a80 --- /dev/null +++ b/storage/sparrow/api/memalloc.cc @@ -0,0 +1,84 @@ +#include "memalloc.h" + +#include "include/global.h" +#include "api_assert.h" + +namespace Sparrow +{ + void my_free(void *ptr) + { + free(ptr); + } + + void *my_malloc(size_t size, myf my_flags) + { + void* point; + + /* Safety */ + if (!size) + size=1; + + point= malloc(size); + + if (point == NULL) + { + if (my_flags & MY_FAE) + exit(1); + } + else if (my_flags & MY_ZEROFILL) + memset(point, 0, size); + + return point; + } + + + void *my_realloc(void *oldpoint, size_t size, myf my_flags) + { + void *point; + + SPW_ASSERT(size > 0); + if (!oldpoint && (my_flags & MY_ALLOW_ZERO_PTR)) + return my_malloc(size, my_flags); + + if ((point= realloc(oldpoint, size)) == NULL) + { + if (my_flags & MY_FREE_ON_ERROR) + my_free(oldpoint); + if (my_flags & MY_HOLD_ON_ERROR) + return oldpoint; + } + return point; + } + + + void *my_memdup(const void *from, size_t length, myf my_flags) + { + void *ptr; + if ((ptr= my_malloc(length,my_flags)) != 0) + memcpy(ptr, from, length); + return ptr; + } + + + char *my_strdup(const char *from, myf my_flags) + { + char *ptr; + size_t length= strlen(from)+1; + if ((ptr= (char*) my_malloc(length, my_flags))) + memcpy(ptr, from, length); + return ptr; + } + + + char *my_strndup(const char *from, size_t length, myf my_flags) + { + char *ptr; + if ((ptr= (char*) my_malloc(length+1, my_flags))) + { + memcpy(ptr, from, length); + ptr[length]= 0; + } + return ptr; + } + +} \ No newline at end of file diff --git a/storage/sparrow/api/memalloc.h b/storage/sparrow/api/memalloc.h new file mode 100644 index 000000000000..4cb96607f9b4 --- /dev/null +++ b/storage/sparrow/api/memalloc.h @@ -0,0 +1,23 @@ +/* This is the include file that should be included 'first' in every C file. */ +#ifndef _spw_api_memalloc_h +#define _spw_api_memalloc_h + +// Reuse type and constant definitions from MySQL +//#include +#include "my_sys.h" +//#include +//#include "m_string.h" + +//#define bzero(A,B) memset((A),0,(B)) + +namespace Sparrow +{ + void my_free(void *ptr); + void *my_malloc(size_t size, myf my_flags); + void *my_realloc(void *oldpoint, size_t size, myf my_flags); + void *my_memdup(const void *from, size_t length, myf my_flags); + char *my_strdup(const char *from, myf my_flags); + char *my_strndup(const char *from, size_t length, myf my_flags); +} + +#endif /* _spw_api_memalloc_h */ diff --git a/storage/sparrow/api/misc.h b/storage/sparrow/api/misc.h new file mode 100644 index 000000000000..8efdedc9e9f1 --- /dev/null +++ b/storage/sparrow/api/misc.h @@ -0,0 +1,303 @@ +/* + Miscellaneous types. +*/ + +#ifndef _spw_api_misc_h_ +#define _spw_api_misc_h_ + +#include "atomic.h" +//#include "serial.h" +//#include +#include "api_assert.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AutoPtr +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This class holds a pointer and automatically releases it when it goes +// out-of-scope. Similar to std::auto_ptr, without the evil owner bit. +template class AutoPtr { +private: + + T* ptr_; // The pointer we encapsulate. + + AutoPtr(const AutoPtr&); + AutoPtr& operator=(const AutoPtr&); + + void reset() { + if (ptr_ != 0) { + if (!ARRAY) { + delete ptr_; + } + else { + delete [] ptr_; + } + ptr_ = 0; + } + } + +public: + // Construction / destruction + AutoPtr() : ptr_ (0) { + } + explicit AutoPtr(T* ptr) : ptr_ (ptr) { + SPW_ASSERT(ptr_ != 0); + } + ~AutoPtr() { + reset(); + } + + // Pointer-like operators + // + // NOTE: While it may be very tempting to write an automatic conversion operator + // to T* here, it is usually considered an evil thing and may cause very subtle + // glitches that can make your life truly miserable, so it's better to use get(). + AutoPtr& operator=(T* ptr) { + reset(); + ptr_ = ptr; + return *this; + } + + bool operator==(const T* ptr) const { + return ptr_ == ptr; + } + bool operator!=(const T* ptr) const { + return ptr_ != ptr; + } + T& operator*() { + SPW_ASSERT(ptr_ != 0); + return *ptr_; + } + T* operator->() { + SPW_ASSERT(ptr_ != 0); + return ptr_; + } + T& operator[](const uint32_t index) { + SPW_ASSERT(ptr_ != 0); + return ptr_[index]; + } + T* get() { + return ptr_; + } + + // + // Releases the pointer we're holding, meaning the the caller becomes + // responsible of it's deletion. The released pointer is returned. + // + T* release() { + SPW_ASSERT(ptr_ != 0); + T* ptr = ptr_; + ptr_ = 0; + return ptr; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RefCounter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Atomic reference counter. +class RefCounter { +public: + + RefCounter(const uint32_t n = 0): refs_ (n) { ; } + + void acquire() { Atomic::inc32(&refs_); } + bool release() { return Atomic::dec32(&refs_) == 0; } + + uint32_t refs() const { return refs_; } + + // prefix + uint32_t operator ++ () { return Atomic::inc32(&refs_); } + uint32_t operator -- () { return Atomic::dec32(&refs_); } + + // postfix + uint32_t operator ++ (int) { return Atomic::inc32(&refs_) - 1; } + uint32_t operator -- (int) { return Atomic::dec32(&refs_) + 1; } + + // add/sub + RefCounter& operator += (const int v) { Atomic::add32(&refs_, v); return *this; } + RefCounter& operator -= (const int v) { Atomic::add32(&refs_, -v); return *this; } + + // reset/set + void reset(const uint32_t n = 0) { refs_ = n; } + RefCounter& operator = (const uint32_t n) { refs_ = n; return *this; } + +private: + + volatile uint32_t refs_; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RefCounted +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Base class that provides a reference counting mechanism. +class RefCounted { +private: + + mutable RefCounter count_; // The current reference count. + +public: + RefCounted() : count_ (0) { + } + + // Copy constructor. Sets the count to 0. + RefCounted(const RefCounted&) : count_ (0) { + } + + // Destructor. Ensures nobody still holds a reference! + virtual ~RefCounted() { + SPW_ASSERT(count_.refs () == 0); + } + + RefCounted& operator = (const RefCounted&) = default; + + void acquireRef() { + ++count_; + } + + bool releaseRef() { + return --count_ == 0; + } + + void resetRef(uint32_t n) { + count_.reset(n); + } + + uint32_t refs() const { + return count_.refs(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RefPtr +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Smart pointer class that automatically takes and releases a reference on +// the object it holds. Usable with any kind of reference counted object. +// +// In order to be usable with this class, T must inherit from RefCounted. +template class RefPtr { +private: + + T* ptr_; // The pointer we hold. + + // Takes a reference if we're holding a pointer. + void connect() { + if (ptr_ != 0) { + ptr_->acquireRef(); + } + } + + // Releases the reference on the object we may hold. + void disconnect() { + if (ptr_ != 0 && ptr_->releaseRef()) { + delete ptr_; + } + } + +public: + + RefPtr() : ptr_ (0) { + } + + explicit RefPtr(T* ptr) : ptr_ (ptr) { + connect (); + } + + // Copy constructor. + RefPtr(const RefPtr& ptr) : ptr_ (ptr.ptr_) { + connect (); + } + + // Copy constructor from other refptr types + template RefPtr(const RefPtr& ptr) : ptr_ (ptr.get ()) { + connect (); + } + + // Destructor. + ~RefPtr() { + disconnect (); + } + + // Returns the pointer we encapsulate. + T* get() const { + return ptr_; + } + T& operator*() const { + SPW_ASSERT(ptr_ != 0); + return *ptr_; + } + T* operator->() const { + SPW_ASSERT(ptr_ != 0); + return ptr_; + } + operator T*() const { + return ptr_; + } + + // Automatic conversion to other refptr types + template operator RefPtr() const { + return RefPtr(ptr_); + } + + // Assignment operator. + RefPtr& operator = (T* ptr) { + if (ptr_ == ptr) { + return *this; + } + disconnect (); + ptr_ = ptr; + connect (); + return *this; + } + + // Assignment operator. + RefPtr& operator = (const RefPtr& ptr) { + if (this == &ptr || ptr_ == ptr.ptr_) { + return *this; + } + disconnect (); + ptr_ = ptr.ptr_; + connect (); + return *this; + } + + // Comparison operators. + bool operator < (const RefPtr& ptr) const { + if (ptr_ == 0) { + return ptr.ptr_ != 0; + } + if (ptr.ptr_ == 0) { + return false; + } + if (ptr_ == ptr.ptr_) { + return false; + } + return *ptr_ < *ptr.ptr_; + } + bool operator == (const RefPtr& ptr) const { + if (ptr_ == 0) { + return ptr.ptr_ == 0; + } + if (ptr.ptr_ == 0) { + return false; + } + if (ptr_ == ptr.ptr_) { + return true; + } + return *ptr_ == *ptr.ptr_; + } + + void reset() { + ptr_ = 0; + } +}; + + +} + +#endif /* #ifndef _spw_api_misc_h_ */ diff --git a/storage/sparrow/api/sema.h b/storage/sparrow/api/sema.h new file mode 100644 index 000000000000..2d64f3d2f77b --- /dev/null +++ b/storage/sparrow/api/sema.h @@ -0,0 +1,60 @@ +/* + Semaphore. +*/ + +#ifndef _spw_api_sema_h_ +#define _spw_api_sema_h_ + +#include "cond.h" +#include "misc.h" +#include "str.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Sema +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Sema { +private: + + Cond cond_; + uint32_t volatile count_; + +public: + + Sema(const char* name, const uint32_t count = 0) : cond_(false, (Str(name) + Str("::cond_")).c_str()), count_(count) { + } + + ~Sema() { + } + + void post(const uint32_t count = 1) { + Guard guard(cond_.getLock()); + count_ += count; + cond_.signalAll(true); + } + + bool wait(const uint64_t milliseconds, const bool all = false) { + Guard guard(cond_.getLock()); + while (count_ == 0) { + if (!cond_.wait(milliseconds, true)) { + return false; + } + } + if (all) { + count_ = 0; + } else { + count_--; + } + return true; + } + + bool wait(const bool all = false) { + return wait(0, all); + } +}; + +} + +#endif /* #ifndef _spw_api_sema_h_ */ diff --git a/storage/sparrow/api/serial.cc b/storage/sparrow/api/serial.cc new file mode 100644 index 000000000000..0da0c48b344e --- /dev/null +++ b/storage/sparrow/api/serial.cc @@ -0,0 +1,164 @@ +#include "my_sys.h" +#include "serial.h" +#include "str.h" + +#ifdef _WIN32 +#pragma warning(disable:4355) +#else +#include +#endif + +#include "my_io.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ByteBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// ByteBuffers require page-aligned memory to perform e.g. direct I/O. +// STATIC +uint8_t* ByteBuffer::mmap(const uint32_t size) { +#ifdef _WIN32 + uint8_t* buffer = static_cast(VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); +#else + uint8_t* buffer = reinterpret_cast(::mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0)); + if (buffer == MAP_FAILED) { + buffer = 0; + } +#endif + if (buffer == 0) { + SparrowException e = SparrowException::create(true, SPW_API_OUT_OF_MEMORY, "Cannot allocate %u bytes", size); + e.toLog(); + exit(1); + } + return buffer; +} + +// STATIC +bool ByteBuffer::munmap(uint8_t* buffer, const uint32_t size) { +#ifdef _WIN32 + const BOOL result = VirtualFree(buffer, 0, MEM_RELEASE); +#else + const bool result = ::munmap(reinterpret_cast(buffer), size) == 0; +#endif + if (!result) { + SparrowException e = SparrowException::create(true, SPW_API_FAILED, "Cannot free %u bytes at address %p", size, buffer); + e.toLog(); + exit(1); + } + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// HeapBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/*HeapBuffer::HeapBuffer() : ByteBuffer(new uint8_t[1024], 1024, this) { +}*/ + +HeapBuffer::HeapBuffer(const uint32_t limit /* = 1024 */) + : ByteBuffer(new uint8_t[limit], limit, this) { +} + +HeapBuffer& HeapBuffer::operator = ( const HeapBuffer& buffer ) { + if ( &buffer == this ) return *this; + + delete [] data_; + pos_ = 0; + + data_ = new uint8_t[buffer.limit_]; + memcpy(data_, buffer.data_, buffer.limit_); + limit_ = buffer.limit_; + return *this; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SocketReader::SocketReader(my_socket socketId, ByteBuffer& buffer) _THROW_(SparrowException) + : ByteBuffer(buffer, this), socket_(socketId), bytesToRead_(buffer.limit()), bytesRead_(0) { + overflow(); +} + +void SocketReader::overflow() _THROW_(SparrowException) { + const int length = static_cast(bytesToRead_) - bytesRead_; + if (length == 0) { + return; + } else if (length < 0) { + throw SparrowException("Reached end of stream"); + } + int received = recv(socket_, reinterpret_cast(getData() + bytesRead_), length, 0); +#ifdef _WIN32 + if (GetLastError() == WSAECONNRESET) { + // Treat connection reset as a normal close. + received = 0; + } +#endif + if (received == -1) { + throw SparrowException::create(true, SPW_API_SOCKET_READ_ERR, "Error while reading data from socket"); + } else if (received == 0) { + throw SparrowException("Connection closed", false, SPW_API_SOCKET_CONN_CLOSED); + } + bytesRead_ += received; + limit(bytesRead_); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const uint32_t SocketWriter::size_ = 16 * 1024 * 1024; + +SocketWriter::SocketWriter(my_socket socketId) + : ByteBuffer(ByteBuffer::mmap(SocketWriter::size_), SocketWriter::size_, this), socket_(socketId) +{ + if ( socketId == INVALID_SOCKET ) + throw SparrowException( "Invalid socket. Can't use it to send data.", true, SPW_API_SOCKET_CONN_CLOSED ); +} + +SocketWriter::~SocketWriter() { + flush(); + ByteBuffer::munmap(getData(), SocketWriter::size_); +} + +void SocketWriter::send(const ByteBuffer& v) _THROW_(SparrowException) { + + if ( socket_ == INVALID_SOCKET ) + throw SparrowException( "Invalid socket. Can't send data.", true, SPW_API_SOCKET_CONN_CLOSED ); + + // Sends currently buffered data + flush(); + position(0); + + // Sends the content of ByteBuffer v directly (does not make a copy into our internal buffer before). + const uint32_t limit = v.limit(); + // If empty buffer, no need to make another send call + if ( limit > 0 ) { + const int sent = ::send(socket_, reinterpret_cast(v.getData()), static_cast(limit), 0); + if (sent == -1) { + PRINT_DBUG("[spw_Connection::process] send failed"); + throw SparrowException::create(true, SPW_API_SOCKET_WRITE_ERR, "Error while writing data to socket"); + } + } +} + +void SocketWriter::flush() _THROW_(SparrowException) +{ + if ( socket_ == INVALID_SOCKET ) + throw SparrowException( "Invalid socket. Can't flush data buffer.", true, SPW_API_SOCKET_CONN_CLOSED ); + + uint32_t length = 0; + while (length < position()) { + const int sent = ::send(socket_, reinterpret_cast(getData() + length), static_cast(position() - length), 0); + if (sent == -1) { + PRINT_DBUG("[spw_Connection::process] send failed"); + throw SparrowException::create(true, SPW_API_SOCKET_WRITE_ERR, "Error while writing data to socket"); + } + length += sent; + } +} + +} diff --git a/storage/sparrow/api/serial.h b/storage/sparrow/api/serial.h new file mode 100644 index 000000000000..a13cad78a222 --- /dev/null +++ b/storage/sparrow/api/serial.h @@ -0,0 +1,742 @@ +#ifndef _spw_api_serial_h_ +#define _spw_api_serial_h_ + +#include "include/exception.h" +#include "interval.h" +#include "vec.h" +#include "hash.h" +#include "misc.h" + + + +// Endianness must match Sparrow's +#define SPARROW_LITTLE_ENDIAN 1 + +// Marshalling/unmarshalling macros. SLOW macros are used when memory crosses the buffer limit, +// FAST macros are used otherwise. + +// Macros for little endian ordering. + +#define SLOW_STORE2_L(V) do { put((uint8_t)(V)); put((uint8_t)((V) >> 8)); } while(0) +#define SLOW_STORE4_L(V) do { put((uint8_t)(V)); put((uint8_t)((V) >> 8)); \ + put((uint8_t)((V) >> 16)); put((uint8_t)((V) >> 24)); } while(0) +#define SLOW_STORE8_L(V) do { put((uint8_t)(V)); put((uint8_t)((V) >> 8)); \ + put((uint8_t)((V) >> 16)); put((uint8_t)((V) >> 24)); put((uint8_t)((V) >> 32)); \ + put((uint8_t)((V) >> 40)); put((uint8_t)((V) >> 48)); put((uint8_t)((V) >> 56)); } while(0) +#define SLOW_LOAD2_L(V) do { (V) = get(); V |= (get() << 8); } while(0) +#define SLOW_LOAD4_L(V) do { (V) = get(); V |= (get() << 8); V |= (get() << 16); V |= (get() << 24); } while(0) +#define SLOW_LOAD8_L(V) do { (V) = (uint64_t)get(); V |= ((uint64_t)get() << 8); V |= ((uint64_t)get() << 16); V |= ((uint64_t)get() << 24); \ + V |= ((uint64_t)get() << 32); V |= ((uint64_t)get() << 40); V |= ((uint64_t)get() << 48); V |= ((uint64_t)get() << 56); } while(0) +#ifdef WORDS_BIGENDIAN +#define FAST_STORE2_L(P,V) do { *(P) = (uint8_t)(V); *((P) + 1) = (uint8_t)((V) >> 8); } while(0) +#define FAST_STORE4_L(P,V) do { *(P) = (uint8_t)(V); *((P) + 1) = (uint8_t)((V) >> 8); \ + *((P) + 2) = (uint8_t)((V) >> 16); *((P) + 3) = (uint8_t)((V) >> 24); } while(0) +#define FAST_STORE8_L(P,V) do { *(P) = (uint8_t)(V); *((P) + 1) = (uint8_t)((V) >> 8); \ + *((P) + 2) = (uint8_t)((V) >> 16); *((P) + 3) = (uint8_t)((V) >> 24); *((P) + 4) = (uint8_t)((V) >> 32); \ + *((P) + 5) = (uint8_t)((V) >> 40); *((P) + 6) = (uint8_t)((V) >> 48); *((P) + 7) = (uint8_t)((V) >> 56); } while(0) +#define FAST_LOAD2_L(P, V) do { uint16_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 1); *(ptemp + 1) = *(P); (V) = vtemp; } while(0) +#define FAST_LOAD4_L(P, V) do { uint32_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 3); *(ptemp + 1) = *((P) + 2); *(ptemp + 2) = *((P) + 1); *(ptemp + 3) = *(P); \ + (V) = vtemp; } while(0) +#define FAST_LOAD8_L(P, V) do { uint64_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 7); *(ptemp + 1) = *((P) + 6); *(ptemp + 2) = *((P) + 5); *(ptemp + 3) = *((P) + 4); \ + *(ptemp + 4) = *((P) + 3); *(ptemp + 5) = *((P) + 2); *(ptemp + 6) = *((P) + 1); *(ptemp + 7) = *(P); \ + (V) = vtemp; } while(0) +#else +#define FAST_STORE2_L(P,V) do { *((uint16_t*)(P)) = (uint16_t)(V); } while(0) +#define FAST_STORE4_L(P,V) do { *((uint32_t*)(P)) = (uint32_t)(V); } while(0) +#define FAST_STORE8_L(P,V) do { *((uint64_t*)(P)) = (uint64_t)(V); } while(0) +#define FAST_LOAD2_L(P,V) do { V = *((uint16_t*)(P)); } while(0) +#define FAST_LOAD4_L(P,V) do { V = *((uint32_t*)(P)); } while(0) +#define FAST_LOAD8_L(P,V) do { V = *((uint64_t*)(P)); } while(0) +#endif + +// Macros for big endian ordering. + +#define SLOW_STORE2_B(V) do { put((uint8_t)((V) >> 8)); put((uint8_t)(V)); } while(0) +#define SLOW_STORE4_B(V) do { put((uint8_t)((V) >> 24)); put((uint8_t)((V) >> 16)); \ + put((uint8_t)((V) >> 8)); put((uint8_t)(V)); } while(0) +#define SLOW_STORE8_B(V) do { put((uint8_t)((V) >> 56)); put((uint8_t)((V) >> 48)); \ + put((uint8_t)((V) >> 40)); put((uint8_t)((V) >> 32)); put((uint8_t)((V) >> 24)); \ + put((uint8_t)((V) >> 16)); put((uint8_t)((V) >> 8)); put((uint8_t)(V)); } while(0) +#define SLOW_LOAD2_B(V) do { (V) = (get() << 8); V |= get(); } while(0) +#define SLOW_LOAD4_B(V) do { (V) = (get() << 24); V |= (get() << 16); V |= (get() << 8); V |= get(); } while(0) +#define SLOW_LOAD8_B(V) do { (V) = ((uint64_t)get() << 56); V |= ((uint64_t)get() << 48); V |= ((uint64_t)get() << 40); \ + V |= ((uint64_t)get() << 32); V |= ((uint64_t)get() << 24); V |= ((uint64_t)get() << 16); V |= ((uint64_t)get() << 8); V |= (uint64_t)get(); } while(0) +#ifdef WORDS_BIGENDIAN +#define FAST_STORE2_B(P,V) do { memcpy((uint8_t*)(P), (uint8_t*)(&V), 2); } while(0) +#define FAST_STORE4_B(P,V) do { memcpy((uint8_t*)(P), (uint8_t*)(&V), 4); } while(0) +#define FAST_STORE8_B(P,V) do { memcpy((uint8_t*)(P), (uint8_t*)(&V), 8); } while(0) +#define FAST_LOAD2_B(P, V) do { memcpy((uint8_t*)(&V), (uint8_t*)(P), 2); } while(0) +#define FAST_LOAD4_B(P, V) do { memcpy((uint8_t*)(&V), (uint8_t*)(P), 4); } while(0) +#define FAST_LOAD8_B(P, V) do { memcpy((uint8_t*)(&V), (uint8_t*)(P), 8); } while(0) +#else +#define FAST_STORE2_B(P,V) do { *(P) = (uint8_t)((V) >> 8); *((P) + 1) = (uint8_t)(V); } while(0) +#define FAST_STORE4_B(P,V) do { *(P) = (uint8_t)((V) >> 24); *((P) + 1) = (uint8_t)((V) >> 16); \ + *((P) + 2) = (uint8_t)((V) >> 8); *((P) + 3) = (uint8_t)(V); } while(0) +#define FAST_STORE8_B(P,V) do { *(P) = (uint8_t)((V) >> 56); *((P) + 1) = (uint8_t)((V) >> 48); \ + *((P) + 2) = (uint8_t)((V) >> 40); *((P) + 3) = (uint8_t)((V) >> 32); *((P) + 4) = (uint8_t)((V) >> 24); \ + *((P) + 5) = (uint8_t)((V) >> 16); *((P) + 6) = (uint8_t)((V) >> 8); *((P) + 7) = (uint8_t)(V); } while(0) +#define FAST_LOAD2_B(P, V) do { uint16_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 1); *(ptemp + 1) = *(P); (V) = vtemp; } while(0) +#define FAST_LOAD4_B(P, V) do { uint32_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 3); *(ptemp + 1) = *((P) + 2); *(ptemp + 2) = *((P) + 1); *(ptemp + 3) = *(P); \ + (V) = vtemp; } while(0) +#define FAST_LOAD8_B(P, V) do { uint64_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 7); *(ptemp + 1) = *((P) + 6); *(ptemp + 2) = *((P) + 5); *(ptemp + 3) = *((P) + 4); \ + *(ptemp + 4) = *((P) + 3); *(ptemp + 5) = *((P) + 2); *(ptemp + 6) = *((P) + 1); *(ptemp + 7) = *(P); \ + (V) = vtemp; } while(0) +#endif + +// Macros to encode Sparrow data. + +#ifdef SPARROW_LITTLE_ENDIAN +#define SLOW_STORE2(V) SLOW_STORE2_L(V) +#define SLOW_STORE4(V) SLOW_STORE4_L(V) +#define SLOW_STORE8(V) SLOW_STORE8_L(V) +#define SLOW_LOAD2(V) SLOW_LOAD2_L(V) +#define SLOW_LOAD4(V) SLOW_LOAD4_L(V) +#define SLOW_LOAD8(V) SLOW_LOAD8_L(V) +#define FAST_STORE2(P,V) FAST_STORE2_L(P,V) +#define FAST_STORE4(P,V) FAST_STORE4_L(P,V) +#define FAST_STORE8(P,V) FAST_STORE8_L(P,V) +#define FAST_LOAD2(P, V) FAST_LOAD2_L(P, V) +#define FAST_LOAD4(P, V) FAST_LOAD4_L(P, V) +#define FAST_LOAD8(P, V) FAST_LOAD8_L(P, V) +#else +#define SLOW_STORE2(V) SLOW_STORE2_B(V) +#define SLOW_STORE4(V) SLOW_STORE4_B(V) +#define SLOW_STORE8(V) SLOW_STORE8_B(V) +#define SLOW_LOAD2(V) SLOW_LOAD2_B(V) +#define SLOW_LOAD4(V) SLOW_LOAD4_B(V) +#define SLOW_LOAD8(V) SLOW_LOAD8_B(V) +#define FAST_STORE2(P,V) FAST_STORE2_B(P,V) +#define FAST_STORE4(P,V) FAST_STORE4_B(P,V) +#define FAST_STORE8(P,V) FAST_STORE8_B(P,V) +#define FAST_LOAD2(P, V) FAST_LOAD2_B(P, V) +#define FAST_LOAD4(P, V) FAST_LOAD4_B(P, V) +#define FAST_LOAD8(P, V) FAST_LOAD8_B(P, V) +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ByteBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Abstract class to handle buffer overflows. +class ByteBufferOverflow { +public: + + virtual ~ByteBufferOverflow() { + } + virtual void overflow() _THROW_(SparrowException) = 0; + virtual bool end() const = 0; +}; + +class ByteBuffer : public RefCounted { +protected: + + uint8_t* data_; + uint32_t limit_; + uint32_t pos_; + ByteBufferOverflow* overflow_; + +private: + + void overflow() _THROW_(SparrowException); + uint8_t get(); + void put(const uint8_t v); + +public: + + ByteBuffer(); + ByteBuffer(const uint8_t* data, const uint32_t limit); + ByteBuffer(uint8_t* data, const uint32_t limit, ByteBufferOverflow* overflow = 0); + ByteBuffer(const ByteBuffer& buffer, ByteBufferOverflow* overflow); + + uint8_t* getData(); + uint8_t* getCurrentData(); + const uint8_t* getData() const; + const uint8_t* getCurrentData() const; + void position(const uint32_t pos); + uint32_t position() const; + uint32_t limit() const; + void limit(const uint32_t newLimit); + bool end() const; + void advance(uint32_t offset); + uint32_t remaining() const; + bool hasRemaining() const; + void flip(); + + ByteBuffer& operator << (const uint8_t v); + ByteBuffer& operator << (const int8_t v); + ByteBuffer& operator << (const uint16_t v); + ByteBuffer& operator << (const int16_t v); + ByteBuffer& operator << (const uint32_t v); + ByteBuffer& operator << (const int32_t v); + ByteBuffer& operator << (const uint64_t v); + ByteBuffer& operator << (const int64_t v); + ByteBuffer& operator << (const double v); + ByteBuffer& operator << (const bool v); + ByteBuffer& operator << (const ByteBuffer& v); + ByteBuffer& operator << (const char* v); + + ByteBuffer& operator >> (uint8_t& v); + ByteBuffer& operator >> (int8_t& v); + ByteBuffer& operator >> (uint16_t& v); + ByteBuffer& operator >> (int16_t& v); + ByteBuffer& operator >> (uint32_t& v); + ByteBuffer& operator >> (int32_t& v); + ByteBuffer& operator >> (uint64_t& v); + ByteBuffer& operator >> (int64_t& v); + ByteBuffer& operator >> (double& v); + ByteBuffer& operator >> (bool& v); + ByteBuffer& operator >> (ByteBuffer& v); + + static void initialize(); + static uint8_t* mmap(const uint32_t size); + static bool munmap(uint8_t* buffer, const uint32_t size); +}; + +inline void ByteBuffer::overflow() _THROW_(SparrowException) { + if (overflow_ == 0) { + throw SparrowException::create(false, SPW_API_FAILED, "Buffer overflow (limit=%u bytes)", limit_); + } else { + overflow_->overflow(); + } +} + +inline uint8_t ByteBuffer::get() { + if (pos_ >= limit_) { + overflow(); + } + return data_[pos_++]; +} + +inline void ByteBuffer::put(const uint8_t v) { + if (pos_ >= limit_) { + overflow(); + } + data_[pos_++] = v; +} + +inline ByteBuffer::ByteBuffer() + : data_(NULL), limit_(0), pos_(0), overflow_(0) { +} + +inline ByteBuffer::ByteBuffer(const uint8_t* data, const uint32_t limit) + : data_(const_cast(data)), limit_(limit), pos_(0), overflow_(0) { +} + +inline ByteBuffer::ByteBuffer(uint8_t* data, const uint32_t limit, ByteBufferOverflow* overflow /* = 0 */) + : data_(data), limit_(limit), pos_(0), overflow_(overflow) { +} + +inline ByteBuffer::ByteBuffer(const ByteBuffer& buffer, ByteBufferOverflow* overflow) { + data_ = buffer.data_; + limit_ = buffer.limit_; + pos_ = buffer.pos_; + overflow_ = overflow; +} + +inline uint8_t* ByteBuffer::getData() { + return data_; +} + +inline uint8_t* ByteBuffer::getCurrentData() { + return data_ + pos_; +} + +inline const uint8_t* ByteBuffer::getData() const { + return data_; +} + +inline const uint8_t* ByteBuffer::getCurrentData() const { + return data_ + pos_; +} + +inline void ByteBuffer::position(const uint32_t pos) { + pos_ = pos; +} + +inline uint32_t ByteBuffer::position() const { + return pos_; +} + +inline uint32_t ByteBuffer::limit() const { + return limit_; +} + +inline void ByteBuffer::limit(const uint32_t newLimit) { + limit_ = newLimit; +} + +inline bool ByteBuffer::end() const { + return overflow_ == 0 ? (pos_ == limit_) : overflow_->end(); +} + +inline void ByteBuffer::advance(uint32_t offset) { + while (offset > 0) { + if (pos_ >= limit_) { + overflow(); + } + const uint32_t length = std::min(limit_ - pos_, offset); + pos_ += length; + offset -= length; + } +} + +inline uint32_t ByteBuffer::remaining() const { + return limit_ - pos_; +} + +inline bool ByteBuffer::hasRemaining() const { + return pos_ < limit_; +} + +inline void ByteBuffer::flip() { + limit_ = pos_; + pos_ = 0; +} + +inline ByteBuffer& ByteBuffer::operator << (const uint8_t v) { + put(v); + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int8_t v) { + return *this << static_cast(v); +} + +inline ByteBuffer& ByteBuffer::operator << (const uint16_t v) { + const uint32_t npos = pos_ + 2; + if (npos > limit_) { + SLOW_STORE2(v); + } else { + uint8_t* p = data_ + pos_; + FAST_STORE2(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int16_t v) { + return *this << static_cast(v); +} + +inline ByteBuffer& ByteBuffer::operator << (const uint32_t v) { + const uint32_t npos = pos_ + 4; + if (npos > limit_) { + SLOW_STORE4(v); + } else { + uint8_t* p = data_ + pos_; + FAST_STORE4(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int32_t v) { + return *this << static_cast(v); +} + +inline ByteBuffer& ByteBuffer::operator << (const uint64_t v) { + const uint32_t npos = pos_ + 8; + if (npos > limit_) { + SLOW_STORE8(v); + } else { + uint8_t* p = data_ + pos_; + FAST_STORE8(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int64_t v) { + return *this << static_cast(v); +} + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + +inline ByteBuffer& ByteBuffer::operator << (const double v) { + return *this << *reinterpret_cast(&v); +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +inline ByteBuffer& ByteBuffer::operator << (const bool v) { + return *this << static_cast(v ? 1 : 0); +} + +inline ByteBuffer& ByteBuffer::operator << (const ByteBuffer& v) { + const uint32_t limit = v.limit_; + uint32_t pos = v.pos_; + while (pos < limit) { + if (pos_ >= limit_) { + overflow(); + } + const uint32_t length = std::min(limit_ - pos_, limit - pos); + memcpy(data_ + pos_, v.data_ + pos, length); + pos_ += length; + pos += length; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const char* v) { + return *this << ByteBuffer(reinterpret_cast(v), static_cast(strlen(v))); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint8_t& v) { + v = get(); + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int8_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint16_t& v) { + const uint32_t npos = pos_ + 2; + if (npos > limit_) { + SLOW_LOAD2(v); + } else { + uint8_t* p = data_ + pos_; + FAST_LOAD2(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int16_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint32_t& v) { + const uint32_t npos = pos_ + 4; + if (npos > limit_) { + SLOW_LOAD4(v); + } else { + uint8_t* p = data_ + pos_; + FAST_LOAD4(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int32_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint64_t& v) { + const uint32_t npos = pos_ + 8; + if (npos > limit_) { + SLOW_LOAD8(v); + } else { + uint8_t* p = data_ + pos_; + FAST_LOAD8(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int64_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (double& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (bool& v) { + uint8_t b; + *this >> b; + v = (b != 0); + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (ByteBuffer& v) { + while (v.pos_ < v.limit_) { + if (pos_ >= limit_) { + overflow(); + } + const uint32_t length = std::min(limit_ - pos_, v.limit_ - v.pos_); + memcpy(v.data_ + v.pos_, data_ + pos_, length); + pos_ += length; + v.pos_ += length; + } + return *this; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, Interval& interval) { + T low; + bool lowerSet; + buffer >> lowerSet; + if (lowerSet) { + buffer >> low; + } + T up; + bool upperSet; + buffer >> upperSet; + if (upperSet) { + buffer >> up; + } + bool lowerIncluded, upperIncluded; + buffer >> lowerIncluded >> upperIncluded; + interval = Interval(lowerSet ? &low : 0, upperSet ? &up : 0, lowerIncluded, upperIncluded); + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const Interval& interval) { + const T* low = interval.getLow(); + buffer << (low != 0); + if (low != 0) { + buffer << *low; + } + const T* up = interval.getUp(); + buffer << (up != 0); + if (up != 0) { + buffer << *up; + } + buffer << interval.isLowerIncluded() << interval.isUpperIncluded(); + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSvector& v) { + buffer << static_cast(v.length()); + for (uint32_t i = 0; i < v.length(); ++i) { + buffer << v[i]; + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSvector& v) { + uint32_t length; + buffer >> length; + v.resize(length); + for (uint32_t i = 0; i < length; ++i) { + T t; + buffer >> t; + v.append(t); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSpVector& v) { + buffer << static_cast(v.length()); + for (uint32_t i = 0; i < v.length(); ++i) { + buffer << *(v[i]); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSpVector& v) { + uint32_t length; + buffer >> length; + v.resize(length); + for (uint32_t i = 0; i < length; ++i) { + T* t = new T(); + buffer >> *t; + v.append(t); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSpSortedVector& v) { + buffer << static_cast(v.length()); + for (uint32_t i = 0; i < v.length(); ++i) { + buffer << *(v[i]); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSpSortedVector& v) { + uint32_t length; + buffer >> length; + v.resize(length); + for (uint32_t i = 0; i < length; ++i) { + T* t = new T(); + buffer >> *t; + v.append(t); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYShash& h) { + buffer << h.entries(); + SYShashIterator iterator(h); + while (++iterator) { + buffer << iterator.key(); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYShash& h) { + uint32_t entries; + buffer >> entries; + for (uint32_t i = 0; i < entries; ++i) { + T entry; + buffer >> entry; + h.insert(entry); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSpHash& h) { + buffer << h.entries(); + SYSpHashIterator iterator(h); + while (++iterator) { + buffer << *iterator.key(); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSpHash& h) { + uint32_t entries; + buffer >> entries; + for (uint32_t i = 0; i < entries; ++i) { + T* entry = new T(); + buffer >> *entry; + h.insert(entry); + } + return buffer; +} + +typedef RefPtr RefByteBuffer; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ByteBufferGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Byte buffer guard. +class ByteBufferGuard { +private: + + const uint32_t size_; + ByteBuffer buffer_; + +public: + + ByteBufferGuard(const uint32_t size) : size_(size), buffer_(ByteBuffer::mmap(size), size) { + } + + ~ByteBufferGuard() { + ByteBuffer::munmap(buffer_.getData(), size_); + } + + ByteBuffer& get() { + return buffer_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IOBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IOBuffer : public ByteBuffer { +public: + IOBuffer(const uint32_t size) : ByteBuffer( ByteBuffer::mmap(size), size ) { + } + + ~IOBuffer() { + ByteBuffer::munmap(data_, limit_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// HeapBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class HeapBuffer : public ByteBuffer, public ByteBufferOverflow { +public: + + HeapBuffer(const uint32_t limit=1024); + + virtual ~HeapBuffer() { + delete [] data_; + } + + // Makes a copy of buffer + HeapBuffer& operator = (const HeapBuffer& buffer); + + void overflow() override _THROW_(SparrowException) { + uint8_t* save = data_; + data_ = new uint8_t[limit_ * 2]; + memcpy(data_, save, limit_); + limit_ *= 2; + delete [] save; + } + + bool end() const override { + return false; + } +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SocketReader : public ByteBuffer, public ByteBufferOverflow { +private: + + my_socket socket_; + const uint32_t bytesToRead_; + int bytesRead_; + +public: + + SocketReader(my_socket socketId, ByteBuffer& buffer) _THROW_(SparrowException); + + virtual ~SocketReader() { + } + + void overflow() override _THROW_(SparrowException); + + bool end() const override { + return bytesRead_ == static_cast(bytesToRead_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SocketWriter : public ByteBuffer, public ByteBufferOverflow { +private: + + static const uint32_t size_; + + my_socket socket_; + +public: + + SocketWriter(my_socket socketId); + + virtual ~SocketWriter(); + + // Sends the content of ByteBuffer v directly (does not make a copy into our internal buffer before). + void send (const ByteBuffer& v) _THROW_(SparrowException); + + void overflow() override _THROW_(SparrowException) { + flush(); + position(0); + } + + void flush() _THROW_(SparrowException); + + bool end() const override { + return false; // No EOF when writing. + } +}; + +} + +#endif /* #ifndef _spw_api_serial_h_ */ diff --git a/storage/sparrow/api/socketutil.cc b/storage/sparrow/api/socketutil.cc new file mode 100644 index 000000000000..41366a85a5f3 --- /dev/null +++ b/storage/sparrow/api/socketutil.cc @@ -0,0 +1,247 @@ +/* + Socket utilities. +*/ + +#include "socketutil.h" +#include "../functions/ipaddress.h" + +#ifndef _WIN32 +#include +#include "my_io.h" +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketUtil +////////////////////////////////////////////////////////////////////////////////////////////////////// + +using namespace IvFunctions; + +namespace Sparrow +{ + +SocketAddress SocketAddress::anyAddr_ = SocketUtil::getAddress("127.0.0.1", 0); + +my_socket SocketUtil::stopSocket_ = INVALID_SOCKET; +SocketAddress SocketUtil::stopSocketAddress_; +bool SocketUtil::v6_ = false; + +// STATIC +void SocketUtil::initialize() _THROW_(SparrowException) { +#ifdef _WIN32 + // Initialize Winsock 2.2. + WSADATA wsaData; + WSAStartup(MAKEWORD(2, 2), &wsaData); +#endif + + // Checks whether IPv6 is supported by trying to create a dummy IPv6 socket. + my_socket socketId = socket(AF_INET6, SOCK_STREAM, IPPROTO_IPV6); + if (socketId != INVALID_SOCKET) { + closesocket(socketId); + v6_ = true; + } + + // Create the stop socket. + try { + stopSocket_ = SocketUtil::create(SOCK_DGRAM, SocketUtil::getAddress("127.0.0.1", 0)); + stopSocketAddress_ = SocketUtil::getAddress(stopSocket_); + } catch( const SparrowException& ) { + // Ignore error: without stop socket, shutdown will be slower - not a big deal. + } +} + +// Creates and binds a socket of the given type to the given address. +// STATIC +my_socket SocketUtil::create(int type, const SocketAddress& src_addr) + _THROW_(SparrowException) { + // Create socket. + my_socket socketId = socket(src_addr.isV6() ? AF_INET6 : AF_INET, type, 0); + if (socketId == INVALID_SOCKET) { + throw SparrowException::create(true, SPW_API_FAILED, "Cannot create socket"); + } + + int dummy = 1; + setsockopt(socketId, SOL_SOCKET, SO_REUSEADDR, (char*)&dummy, sizeof(dummy)); + + if (bind(socketId, src_addr.getSockAddr(), src_addr.getSockAddrLength()) != 0) { + closesocket(socketId); + throw SparrowException::create(true, SPW_API_FAILED, "Cannot bind socket to address %s", src_addr.print().c_str()); + } + + return socketId; +} + +my_socket SocketUtil::createAndConnect(int type, const SocketAddress& target_addr, const SocketAddress& src_addr) + _THROW_(SparrowException) { + // Create socket. + my_socket socketId = socket(target_addr.isV6() ? AF_INET6 : AF_INET, type, 0); + if (socketId == INVALID_SOCKET) { + throw SparrowException::create(true, SPW_API_FAILED, "Cannot create socket"); + } + + int dummy = 1; + setsockopt(socketId, SOL_SOCKET, SO_REUSEADDR, (char*)&dummy, sizeof(dummy)); + + if (bind(socketId, src_addr.getSockAddr(), src_addr.getSockAddrLength()) != 0) { + int error = getLastError(); + closesocket(socketId); + throw SparrowException::create(true, SPW_API_FAILED, "Cannot bind socket to address %s: %d", src_addr.print().c_str(), error); + } + + // Connect to Sparrow. + if ( connect( socketId, target_addr.getSockAddr(), target_addr.getSockAddrLength() ) != 0 ) { + int error = getLastError(); + closesocket(socketId); + throw SparrowException::create(true, SPW_API_FAILED, "Cannot connect socket to address %s: %d", target_addr.print().c_str(), error); + } + + return socketId; +} + + +// Gets binding address from its textual representation. +// STATIC +SocketAddress SocketUtil::getAddress(const char* address, uint32_t port) _THROW_(SparrowException) { + uint8_t buffer[16]; + memset(buffer,0,sizeof(buffer)); + IpAddress ipAddress(buffer, sizeof(buffer)); + const bool isIpAddress = address == 0 ? false : ipAddress.parse(address, static_cast(strlen(address))); + if ( !ipAddress.isV4() ) { + if ( !v6_ ) { + throw SparrowException::create(false, SPW_API_FAILED, "IPV6 not supported. Cannot use address %s", (address != NULL ? address : "")); + } + struct sockaddr_in6 socketAddress; + memset(&socketAddress, 0, sizeof(socketAddress)); + socketAddress.sin6_family = AF_INET6; + socketAddress.sin6_port = htons(static_cast(port)); + if (address == 0 || strlen(address) == 0) { + // No address. + socketAddress.sin6_addr = in6addr_any; + } else if (isIpAddress) { + // Address given. +#ifdef _WIN32 + int s = sizeof(socketAddress); + const bool ok = WSAStringToAddress(const_cast(address), AF_INET6, 0, reinterpret_cast(&socketAddress), &s) == 0; +#else + const bool ok = inet_pton(AF_INET6, address, &socketAddress.sin6_addr) == 1; +#endif + if (!ok) { + throw SparrowException::create(true, SPW_API_FAILED, "Cannot convert address \"%s\" to IPv6 internal format", address); + } + } else { + // Address given as a name: try DNS resolution. + char buffer[16]; + snprintf(buffer, sizeof(buffer), "%u", port); + struct addrinfo* result; + struct addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET6; + const int code = getaddrinfo(address, buffer, &hints, &result); + if (code != 0) { + throw SparrowException::create(false, SPW_API_FAILED, "Cannot get address info for \"%s\", port %u (%s)", + address, port, gai_strerror(code)); + } + memcpy(&socketAddress, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); + } + return SocketAddress(socketAddress); + } else { + struct sockaddr_in socketAddress; + memset(&socketAddress, 0, sizeof(socketAddress)); + socketAddress.sin_family = AF_INET; + socketAddress.sin_port = htons(static_cast(port)); + if (address == 0 || strlen(address) == 0) { + // No address. + socketAddress.sin_addr.s_addr = htonl(INADDR_ANY); + } else if (isIpAddress) { + // Address given. + ulong addr = inet_addr(address); + if (addr == static_cast(INADDR_NONE)) { + addr = htonl(INADDR_ANY); + } + socketAddress.sin_addr.s_addr = addr; + } else { + // Address given as a name: try DNS resolution. + char buffer[16]; + snprintf(buffer, sizeof(buffer), "%u", port); + struct addrinfo* result = 0; + struct addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + const int code = getaddrinfo(address, buffer, &hints, &result); + if (code != 0) { + throw SparrowException::create(false, SPW_API_FAILED, "Cannot get address info for \"%s\", port %u (%s)", + address, port, gai_strerror(code)); + } + memcpy(&socketAddress, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); + } + return SocketAddress(socketAddress); + } +} + +// Gets the address the given socket is bound to. +// STATIC +SocketAddress SocketUtil::getAddress(my_socket socket) _THROW_(SparrowException) { + struct sockaddr_in6 socketAddress; + socklen_t length = sizeof(socketAddress); + if (getsockname(socket, reinterpret_cast(&socketAddress), &length) != 0) { + throw SparrowException::create(true, SPW_API_FAILED, "Cannot get bind address of socket"); + } + if (length == sizeof(socketAddress)) { + return SocketAddress(socketAddress); + } else { + return SocketAddress(*reinterpret_cast(&socketAddress)); + } +} + +// STATIC +void SocketUtil::notifyStopSocket() { + if (stopSocket_ != INVALID_SOCKET) { + const char* message = "stop"; + int sent = sendto(stopSocket_, message, static_cast(strlen(message)), 0, + stopSocketAddress_.getSockAddr(), stopSocketAddress_.getSockAddrLength()); + if (sent < 0) { + try { + throw SparrowException::create(true, SPW_API_FAILED, "Cannot notify stop socket"); + } catch(const SparrowException& e) { + e.toLog(); + } + } + } +} + +// STATIC +int SocketUtil::getLastError() { +#ifdef _WIN32 + return WSAGetLastError(); +#else + return errno; +#endif +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketAddress +////////////////////////////////////////////////////////////////////////////////////////////////////// + +using namespace IvFunctions; + +// Prints an IP address and port number. +Str SocketAddress::print() const { + char buffer[128]; + char* s = buffer; + unsigned int port; + if (v6_) { + port = static_cast(ntohs(raw_.v6_.sin6_port)); + *s++ = '['; + s += IpAddress(reinterpret_cast(&raw_.v6_.sin6_addr), 16).print(s); + *s++ = ']'; + } else { + port = static_cast(ntohs(raw_.v4_.sin_port)); + s += IpAddress(reinterpret_cast(&raw_.v4_.sin_addr), 4).print(s); + } + sprintf(s, ":%u", port); + return Str(buffer, static_cast(strlen(buffer))); +} + +} diff --git a/storage/sparrow/api/socketutil.h b/storage/sparrow/api/socketutil.h new file mode 100644 index 000000000000..c7b2d6dd8da8 --- /dev/null +++ b/storage/sparrow/api/socketutil.h @@ -0,0 +1,138 @@ +/* + Socket utilities. +*/ + +#ifndef _spw_api_socketutil_h +#define _spw_api_socketutil_h + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#include +#endif + +#include "str.h" + +namespace Sparrow +{ +// Address to which a socket is bound. Can be used as a key. +class SocketAddress { +private: + + union RawSocketAddress { + struct sockaddr_in v4_; + struct sockaddr_in6 v6_; + } raw_; + bool v6_; + +public: + static SocketAddress anyAddr_; + +public: + + SocketAddress() : v6_(false) { + memset(&raw_,0,sizeof(raw_)); + } + + SocketAddress(struct sockaddr_in& v4) : v6_(false) { + memset(&raw_,0,sizeof(raw_)); + raw_.v4_ = v4; + } + + SocketAddress(struct sockaddr_in6& v6) : v6_(true) { + memset(&raw_,0,sizeof(raw_)); + raw_.v6_ = v6; + } + + bool isV6() const { + return v6_; + } + + bool isAnyAddr() const { + if ( (v6_ && memcmp( raw_.v6_.sin6_addr.s6_addr, in6addr_any.s6_addr, sizeof(in6addr_any)) == 0 ) + || (!v6_ && raw_.v4_.sin_addr.s_addr == INADDR_ANY ) ) + return true; + return false; + } + + const struct sockaddr_in& getV4() const { + return raw_.v4_; + } + + struct sockaddr_in& getV4() { + return raw_.v4_; + } + + const struct sockaddr_in6& getV6() const { + return raw_.v6_; + } + + struct sockaddr_in6& getV6() { + return raw_.v6_; + } + + const struct sockaddr* getSockAddr() const { + return isV6() ? (const struct sockaddr*)(&getV6()) : (const struct sockaddr*)(&getV4()); + } + + struct sockaddr* getSockAddr() { + return isV6() ? (struct sockaddr*)(&getV6()) : (struct sockaddr*)(&getV4()); + } + + int getSockAddrLength() const { + return static_cast(isV6() ? sizeof(getV6()) : sizeof(getV4())); + } + + bool operator == (const SocketAddress& right) const { + return v6_ == right.v6_ && memcmp(&raw_, &right.raw_, sizeof(raw_)) == 0; + } + + uint32_t hash() const { + uint32_t h = 1; + uint32_t i = sizeof(raw_); + const uint8_t* raw = (const uint8_t*)&raw_; + while (i-- > 0) { + h = 31 * h + raw[i]; + } + return h; + } + + Str print() const; + +}; + +class SocketUtil +{ +private: + + static my_socket stopSocket_; + static SocketAddress stopSocketAddress_; + static bool v6_; + +public: + + static void initialize() _THROW_(SparrowException); + + static my_socket create(int type, const SocketAddress& src_addr) _THROW_(SparrowException); + + static my_socket createAndConnect(int type, const SocketAddress& target_addr, const SocketAddress& src_addr) _THROW_(SparrowException); + + static SocketAddress getAddress(const char* address, uint32_t port) _THROW_(SparrowException); + + static SocketAddress getAddress(my_socket socket) _THROW_(SparrowException); + + static my_socket getStopSocket() { + return stopSocket_; + } + + static int getLastError(); + + static void notifyStopSocket(); +}; + +} // namespace sparrow + +#endif // #ifndef _spw_api_socketutil_h diff --git a/storage/sparrow/api/spw_connection.cc b/storage/sparrow/api/spw_connection.cc new file mode 100644 index 000000000000..c8ab3a827af8 --- /dev/null +++ b/storage/sparrow/api/spw_connection.cc @@ -0,0 +1,1070 @@ +#include "memalloc.h" +#include "spw_connection.h" +#include "spw_table.h" +#include "spw_master.h" +#include "spw_sparrowbuffer.h" +#include "socketutil.h" +#include "compress.h" + +#include "my_io.h" + +// When building in standalone, use the files from sub-dir 'mysql' +// else, use the headers from MySQL source project +//#include "mysql/my_aes.h" +#include + +//#include + + +namespace Sparrow +{ + +// FROM PUBLIC INTERFACE +int initialize() +{ + try { + Lock::initializeStatics(); + Cond::initializeStatics(); + + spw_Table::initialize(); + + // Some global socket layer initialization + SocketUtil::initialize(); + } catch ( const SparrowException& e ) { + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +// FROM PUBLIC INTERFACE +Connection* createConnect() +{ + return new spw_Connection(); +} + +// FROM PUBLIC INTERFACE +void releaseConnect( const Connection* connect ) +{ + if ( connect != NULL ) { + delete connect; + } +} + +// FROM PUBLIC INTERFACE +const char* errmsg() { + return spwerror.getText(); +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Connection Implementation +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const uint8_t spw_Connection::TAG[] = { 83, 80, 65, 82, 82, 79, 87 }; // "SPARROW" + +const uint8_t spw_Connection::SPARROW_API_VERSION = 1; + +spw_Connection::spw_Connection() : Thread("Listener"), + lockSckt_(false, "socket lock"), lockRqst_(false, "requests lock"), compressionAlgorithm_(0) +{ + socket_ = INVALID_SOCKET; + counter_ = 0; +} + +spw_Connection::~spw_Connection(void) +{ + disconnectAndResetRqsts( SparrowException( "shutdown" ), true ); +} + +// [PUBLIC] +int spw_Connection::setProperties( const ConnectionProperties& properties, uint32_t compressionAlgo /* =0 */ ) +{ + Guard lockGuard( lockSckt_ ); + try { + if ( compressionAlgo != 0 && compressionAlgo != 1 ) { + throw SparrowException( "Invalid compression algorithm" ); + } + compressionAlgorithm_ = compressionAlgo; + properties_ = properties; + } catch ( const SparrowException& e ) { + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +// [PUBLIC] +int spw_Connection::setProperties( const char* host, const char* user, const char* psswd, + uint32_t mysqlPort /*=DEF_MYSQL_PORT*/, uint32_t spwPrt /*=DEF_PORT*/, + const char* srcAddr /*=DEF_SOURCE_ADDR*/, uint32_t srcPort /*=0*/, uint32_t compressionAlgo /*=0*/ ) +{ + Guard lockGuard( lockSckt_ ); + try { + if ( compressionAlgo != 0 && compressionAlgo != 1 ) { + throw SparrowException( "Invalid compression algorithm" ); + } + compressionAlgorithm_ = compressionAlgo; + properties_ = spw_ConnectionProperties( host, user, psswd, mysqlPort, spwPrt, srcAddr, srcPort ); + } catch ( const SparrowException& e ) { + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +bool spw_Connection::isClosed() const +{ + return socket_ == INVALID_SOCKET; +} + + +// [PUBLIC] Create the socket, connect to Sparrow, starts the listening thread, send an authentication message. +int spw_Connection::connect() +{ + { + Guard lockGuard( lockSckt_ ); + try + { + if ( socket_ != INVALID_SOCKET ) { + disconnectAndResetRqsts( SparrowException( "reconnecting" ), false ); + } + + // Get address from string + SocketAddress addr = SocketUtil::getAddress( properties_.host_.c_str(), properties_.port_ ); + SocketAddress srcAddr; + if ( properties_.src_addr_.length() == 0 ) { + srcAddr = SocketUtil::getAddress( NULL, properties_.src_port_ ); + } else if ( properties_.src_addr_.length() != 0 ) { + srcAddr = SocketUtil::getAddress( properties_.src_addr_.c_str(), properties_.src_port_ ); + if ( srcAddr.isV6() != addr.isV6() ) { + throw SparrowException::create( false, SPW_API_FAILED, "cannot connect, target addr '%s' and source addr '%s' are of different types.", + properties_.host_.c_str(), properties_.src_addr_.c_str() ); + } + } + + // Connect to Sparrow server + socket_ = SocketUtil::createAndConnect( SOCK_STREAM, addr, srcAddr ); + + // Initialize the fd_set structure + FD_ZERO(&fdSet_); + FD_SET(socket_, &fdSet_); + my_socket stopSocket = SocketUtil::getStopSocket(); + if (stopSocket != INVALID_SOCKET) { + FD_SET(stopSocket, &fdSet_); + } + + // Start listening thread + start(); + } catch(const SparrowException& e) { + disconnectAndResetRqsts( e, false ); + + spwerror = e; + return e.getErrcode(); + } + } + + try + { + // Authenticate + RequestGuard request = authenticate(); + request->getResponse(); + + } catch(const SparrowException& e) { + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +// [PUBLIC] Complete cleanup. Stop listening thread, close socket and abort all pending requests (i.e. API calls) +void spw_Connection::disconnect() +{ + disconnectAndResetRqsts( SparrowException( "user disconnection" ), true ); +} + + +// Complete cleanup. Stop listening thread, close socket and abort all pending requests (i.e. API calls) +void spw_Connection::disconnectAndResetRqsts( const SparrowException& e, bool lock ) +{ + closeSocket( lock ); + resetRqsts( e ); +} + + +// Closes connection socket to Sparrow and ends listenning thread if it's still running +void spw_Connection::closeSocket( bool lock ) +{ + if ( lock ) lockSckt_.lock(); + + //if ( endThread && isRunning() ) { + if ( isRunning() ) { + PRINT_DBUG("[spw_Connection::disconnect] Stopping thread..."); + stop(); + } + + if ( socket_ != INVALID_SOCKET ) { + PRINT_DBUG("[spw_Connection::disconnect] Closing socket..."); + ::shutdown(socket_, SHUT_RDWR); + FD_CLR(socket_, &fdSet_); + closesocket( socket_ ); + socket_ = INVALID_SOCKET; + } + + if ( lock ) lockSckt_.unlock(); +} + + +// Creates a new request object +RequestGuard spw_Connection::createNewRequest() +{ + uint32_t id = Atomic::inc32( &counter_ ); + RequestGuard request( new Request( id ) ); + + { + // Add this new request to our list of pending requests + Guard lockGuard( lockRqst_ ); + requests_.append( request ); + } + + return request; +} + +void spw_Connection::resetRqsts( const SparrowException& e ) +{ + Guard lockGuard( lockRqst_ ); + + PRINT_DBUG("[spw_Connection::disconnect] Reseting %u requests: %s...", requests_.entries(), e.getText() ); + SYSslistIterator iterator(requests_); + while (++iterator) { + iterator.key()->exceptionReceived( e ); + } + requests_.clear(); + PRINT_DBUG("[spw_Connection::disconnect] Disconnected."); +} + + +void spw_Connection::sendHeader( SocketWriter& writer, uint32_t rqstId, uint32_t len, uint32_t comprLen, Action action ) +{ + for (uint32_t i = 0; i < sizeof(TAG); ++i) { + writer << TAG[i]; + } + writer << SPARROW_API_VERSION; + + writer << rqstId << len << compressionAlgorithm_ << comprLen << action; +} + +// Sends a single ByteBuffer +RequestGuard spw_Connection::compressAndSendBuffer(Action action, const ByteBuffer& buffer) +{ + RequestGuard request( createNewRequest() ); + + Guard lockGuard( lockSckt_ ); + if ( socket_ == INVALID_SOCKET ) + throw SparrowException( "Not connected", true, SPW_API_SOCKET_CONN_CLOSED ); + + try + { + SocketWriter writer( socket_ ); + // Compress (eventually) the request buffer and send it + uint32_t length = buffer.position(); + if ( length == 0 ) + { + sendHeader( writer, request->id(), 0, 0, action ); + } + else if ( compressionAlgorithm_ == 0 ) + { + sendHeader( writer, request->id(), length, length, action ); + writer.send( ByteBuffer( buffer.getData(), length ) ); + } + else + { + HeapBuffer compressedBuffer(length); + const uint32_t compressedLength = static_cast(LZJB::compress(buffer.getData(), compressedBuffer.getData(), length, length)); + { + sendHeader( writer, request->id(), length, compressedLength, action ); + if ( compressedLength < length ) { + writer.send( ByteBuffer( compressedBuffer.getData(), compressedLength ) ); + } else { + writer.send( ByteBuffer( buffer.getData(), length ) ); + } + } + } + + // writer.flush is automatically done in its destructor + + } catch ( const SparrowException& e ) { + // Remove request from queue. + getRequest( request->id(), true ); + + // If socket error, closeSocket, which implies reseting all pending requests + if ( networkErr( e.getErrcode() ) ) { + disconnectAndResetRqsts( e, false ); + } + throw; + } + + return request; +} + +// Sends a single ByteBuffer list +RequestGuard spw_Connection::compressAndSendBuffer(Action action, const BufferList& buffer) +{ + const SYSvector& buffers( buffer.getBuffers() ); + if ( buffers.length() == 0 ) { + return compressAndSendBuffer( action, ByteBuffer() ); + } + if ( buffers.length() == 1 ) { + return compressAndSendBuffer( action, *buffers[0] ); + } + + RequestGuard request( createNewRequest() ); + + Guard lockGuard( lockSckt_ ); + if ( socket_ == INVALID_SOCKET ) + throw SparrowException( "Not connected", true, SPW_API_SOCKET_CONN_CLOSED ); + + try + { + SocketWriter writer( socket_ ); + + uint32_t length = buffer.getPosition(); // Total size of data to send in bytes + + // If no compression required, then send buffers as they are + if ( compressionAlgorithm_ == 0 ) + { + sendHeader( writer, request->id(), length, length, action ); + for ( uint32_t i=0; i(LZJB::compress(uncompressedBuffer.get().getData(), + compressedBuffer.get().getData(), length, length)); + { + // Write compression header + sendHeader( writer, request->id(), length, compressedLength, action ); + + // Send compressed data if compression decreased buffer size, otherwise send uncompressed data + if ( compressedLength < length ) { + writer.send( ByteBuffer( compressedBuffer.get().getData(), compressedLength ) ); + } else { + writer.send( uncompressedBuffer.get() ); + } + } + } + } catch ( const SparrowException& e ) { + // Remove request from queue. + getRequest( request->id(), true ); + + // If socket error, closeSocket, which implies reseting all pending requests + if ( networkErr( e.getErrcode() ) ) { + disconnectAndResetRqsts( e, false ); + } + throw; + } + + // writer.flush is automatically done in its destructor + + return request; +} + +RequestGuard spw_Connection::authenticate() _THROW_(SparrowException) +{ + uint8_t data[1024]; + ByteBuffer buffer( data, sizeof(data) ); + + { + Guard lockGuard( lockSckt_ ); + buffer << properties_.user_; + + const uint32_t length = properties_.pssw_.length(); + static const char* secretKey = "49#28!86@14\"&"; + char tmp[256]; + const uint32_t aesLength = static_cast(my_aes_get_size(length, my_aes_128_ecb)); + const bool onStack = aesLength <= sizeof(tmp); + AutoPtr passwordGuard(onStack ? tmp : new char[aesLength]); + char* encryptedPassword = onStack ? passwordGuard.release() : passwordGuard.get(); + const int result = my_aes_encrypt( reinterpret_cast(properties_.pssw_.c_str()), static_cast(length), + reinterpret_cast(encryptedPassword), + reinterpret_cast(secretKey), static_cast(strlen(secretKey)), + my_aes_128_ecb, NULL ); + if (result <= 0) { + throw SparrowException::create(false, SPW_API_FAILED, "Cannot encrypt password: error code %d", result); + } + encryptedPassword[result] = '\0'; + ByteBuffer buff( (uint8_t*)encryptedPassword, aesLength ); + buffer << buff.limit() << buff; + } + + return compressAndSendBuffer( AUTH, buffer ); +} + +// Reads incoming data on socket +bool spw_Connection::process() +{ + if ( socket_ == INVALID_SOCKET ) + return false; + + fd_set fdSet = fdSet_; + + my_socket stopSocket = SocketUtil::getStopSocket(); + my_socket maxSocket = stopSocket == INVALID_SOCKET ? socket_ : std::max(socket_, stopSocket); + + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + + // Wait for something to arrive on the socket_ + //PRINT_DBUG("[spw_Connection::process] select ..."); + int rc = select(static_cast(maxSocket + 1), &fdSet, 0, 0, &tv); + //PRINT_DBUG("[spw_Connection::process] select %u", rc); + if ( rc == 0) { + //PRINT_DBUG("[spw_Connection::process] timeout"); + return true; + } + else if ( rc < 0 ) { + // Error - closed socket or something + PRINT_DBUG("[spw_Connection::process] SOCKET ERROR! %d", rc); + return false; + } else if (FD_ISSET(socket_, &fdSet)) { + //PRINT_DBUG("[spw_Connection::process] PACKET REC !"); + // Read Header + Request* request = NULL; + uint32_t length, code, compressedLength; + + try + { + { + uint8_t header[24]; + ByteBuffer buffer(header, sizeof(header)); + SocketReader reader(socket_, buffer); + + // Check tag. + for (uint32_t i = 0; i < sizeof(TAG); ++i) { + uint8_t check; + reader >> check; + if (check != TAG[i]) { + throw SparrowException::create(false, SPW_API_FAILED, "Malformed API response"); + } + } + uint8_t version; + reader >> version; + if (version != SPARROW_API_VERSION) { + throw SparrowException::create(false, SPW_API_FAILED, "Unsupported API version: %u", static_cast(version)); + } + + // Read request ID and find corresponding request object in our list + uint32_t id; + reader >> id; + request = getRequest( id, true ); + if ( request == NULL ) { + throw SparrowException::create(false, SPW_API_FAILED, "Received response for unknown request %u", id ); + } + + reader >> length >> code >> compressedLength; + if (length > 100 * 1024 * 1024 || compressedLength > 100 * 1024 * 1024) { + const Str size(Str::fromSize(length)); + throw SparrowException::create(false, SPW_API_FAILED, "Received too much data (%s)", size.c_str()); + } + } + + // Read compressed request data. + try + { + Response* response = new Response( compressedLength, length, code ); + SocketReader reader( socket_, response->getBuffer() ); // Reads whatever data is currently available in socket input buffer + reader.advance( compressedLength ); // Reads data from socket up to compressedLength bytes, blocking if necessary + request->responseReceived( response ); + } catch ( const SparrowException& e ) { + request->exceptionReceived( e ); + throw; + } + } + catch ( const SparrowException& e ) + { + stopping(); + PRINT_ERR("An exception occurred (%s). Disconnecting.", e.getText()); + disconnectAndResetRqsts( e, true ); + return false; + } + } else { + // "stop" socket notification, so stop. + stopping(); + PRINT_DBUG("[spw_Connection::process] STOP NOTIF!"); + return false; + } + + //PRINT_DBUG("[spw_Connection::process] return TRUE"); + return true; +} + + +// [PUBLIC] +void spw_Connection::initialize( const spw_Table& table ) _THROW_(SparrowException) +{ + // Downcast to spw_Table type + //const spw_Table& table = *(static_cast(&tbl)); + + HeapBuffer buffer; + + buffer << table.getDbNameStr() << table.getTableNameStr(); + + // Write columns + const Columns& columns = table.getColumns(); + buffer << columns.length(); + for ( uint32_t i=0; igetResponse(); +} + +// [PUBLIC] +Table* spw_Connection::createTable() const { + return new spw_Table(); +} + +// [PUBLIC] +void spw_Connection::releaseTable( const Table* table ) const +{ + if ( table != NULL ) { + delete table; + } +} + +// [PUBLIC] +ColumnNames* spw_Connection::createColumnNames( int size ) { + return new spw_ColumnNames( size ); +} + +// [PUBLIC] +void spw_Connection::releaseColumnNames( const ColumnNames* names ) { + if ( names != NULL ) { + delete names; + } +} + +// [PUBLIC] +SparrowBuffer* spw_Connection::createBuffer( const Table* table, uint32_t capacity /*=UINT_MAX32*/) const { + if ( !table || capacity == 0 ) { + spwerror = SparrowException::create( false, SPW_API_FAILED, "Invalid arg to createBuffer(%p, %u).", table, capacity ); + return NULL; + } + return new spw_SparrowBuffer( table, capacity ); +} + +void spw_Connection::releaseBuffer( const SparrowBuffer* buffer ) const +{ + if ( buffer != NULL ) { + delete buffer; + } +} + +// [PUBLIC] +int spw_Connection::disableCoalescing( uint32_t timeout, bool wait /*=false*/ ) +{ + if ( isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return SPW_API_SOCKET_CONN_CLOSED; + } + + uint8_t data[1024]; + ByteBuffer buffer( data, sizeof(data) ); + + buffer << timeout << wait; + + try { + + RequestGuard request = compressAndSendBuffer( DISABLE_COALESCING, buffer ); + request->getResponse(); + + } catch ( const SparrowException& e ) { + PRINT_ERR( "%s", e.getText() ); + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +// [PUBLIC] +int spw_Connection::disableCoalescing( uint32_t timeout, const char* database, bool wait /*=false*/ ) +{ + if ( isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return SPW_API_SOCKET_CONN_CLOSED; + } + + uint8_t data[1024]; + ByteBuffer buffer( data, sizeof(data) ); + + buffer << timeout << Str(database) << wait; + + try { + + RequestGuard request = compressAndSendBuffer( DISABLE_COALESCING_DB, buffer ); + request->getResponse(); + + } catch ( const SparrowException& e ) { + PRINT_ERR( "%s", e.getText() ); + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +// [PUBLIC] +int spw_Connection::removePartitions( const char* database, const char* table, const uint64_t start, const uint64_t end ) +{ + if ( isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return SPW_API_SOCKET_CONN_CLOSED; + } + + uint8_t data[1024]; + ByteBuffer buffer( data, sizeof(data) ); + + buffer << static_cast(strlen(database)) << database + << static_cast(strlen(table)) << table + << start << end; + + try { + + RequestGuard request = compressAndSendBuffer( REMOVE_PARTITIONS, buffer ); + request->getResponse(); + + } catch ( const SparrowException& e ) { + PRINT_ERR( "%s", e.getText() ); + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +// [PUBLIC] +int spw_Connection::switchPurgeMode( uint32_t timeout, const char* database, PurgeMode mode ) +{ + if ( isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return SPW_API_SOCKET_CONN_CLOSED; + } + + uint8_t data[1024]; + ByteBuffer buffer( data, sizeof(data) ); + + buffer << timeout << Str(database) << static_cast(mode); + + try { + + RequestGuard request = compressAndSendBuffer( SWITCH_PURGE_MODE, buffer ); + request->getResponse(); + + } catch ( const SparrowException& e ) { + PRINT_ERR( "%s", e.getText() ); + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + +// [PUBLIC] +Master* spw_Connection::getMasterFile( const char* database, const char* table ) +{ + if ( isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return NULL; + } + + if ( !database || !table ) { + spwerror = SparrowException( "Invalid arg to getMasterFile." ); + return NULL; + } + + uint8_t data[1024]; + ByteBuffer buffer( data, sizeof(data) ); + buffer << static_cast(strlen(database)) << database + << static_cast(strlen(table)) << table; + + spw_Master* masterFile = NULL; + try { + + RequestGuard request = compressAndSendBuffer( GET_MASTER, buffer ); + Response* resp = request->getResponse(); + + // Decode the response + masterFile = new spw_Master(); + resp->getBuffer() >> *masterFile; + + } catch ( const SparrowException& e ) { + PRINT_ERR( "%s", e.getText() ); + spwerror = e; + } + + return masterFile; +} + +// [PUBLIC] +Table* spw_Connection::getTable( const char* database, const char* tablename ) +{ + RefPtr master(static_cast(getMasterFile(database, tablename))); + if ( master == NULL ) return NULL; + + spw_Table* table = new spw_Table(); + if ( !table ) return NULL; + + table->setDatabaseName( database ); + table->setTableName( tablename ); + table->setMaxLifetime( master->getMaxLifetime() ); + table->setCoalescPeriod( master->getCoalescingPeriod() ); + table->setAggregPeriod( master->getAggregPeriod() ); + + // Table columns are non-dropped columns from the master file. + Columns columns; + for (uint32_t i = 0; i < master->getNbColumns(); ++i) { + const Column& column = master->getColumn(i); + if (!column.isDropped()) { + columns.append(static_cast(column)); + } + } + table->setColumns(columns); + table->setIndexes( master->getIndexes() ); + table->setForeignKeys( master->getForeignKeys() ); + table->setDns( master->getDnsConfiguration() ); + + return table; +} + +// FROM PUBLIC INTERFACE +void spw_Connection::releaseMasterFile( const Master* master ) const +{ + if ( master != NULL ) { + delete master; + } +} + + +// [PUBLIC] +int spw_Connection::insertData( const Table* tbl, const SparrowBuffer* dt ) +{ + if ( isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return SPW_API_SOCKET_CONN_CLOSED; + } + + if ( !tbl || !dt ) { + spwerror = SparrowException( "Invalid arg to insertData.", true, SPW_API_INVALID_ARG ); + return SPW_API_INVALID_ARG; + } + + const spw_Table& table = *(static_cast(tbl)); + const spw_SparrowBuffer& data = *(static_cast(dt)); + + BufferList buffer( UINT_MAX32, 1024 ); + + RefByteBuffer header( new HeapBuffer() ); + *header << table.getDbNameStr() << table.getTableNameStr(); + *header << data.getRows() << data.getSize(); + + buffer.append( header ); + buffer.append( data.getBuffers() ); + + try { + + RequestGuard request = compressAndSendBuffer( INSERT_DATA, buffer ); + request->getResponse(); + + } catch ( const SparrowException& e ) { + PRINT_ERR( "%s", e.getText() ); + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + + +// [PUBLIC] +int spw_Connection::insertData( const Table* tbl, const ColumnNames* cols, const SparrowBuffer* dt ) +{ + if ( isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return SPW_API_SOCKET_CONN_CLOSED; + } + + if ( !tbl || !dt || !cols ) { + spwerror = SparrowException( "Invalid arg to insertData.", true, SPW_API_INVALID_ARG ); + return SPW_API_INVALID_ARG; + } + + const spw_ColumnNames& columns = *(static_cast(cols)); + const spw_Table& table = *(static_cast(tbl)); + const spw_SparrowBuffer& data = *(static_cast(dt)); + + BufferList buffer( UINT_MAX32, 1024 ); + + RefByteBuffer header( new HeapBuffer(20*1024) ); + *header << table.getDbNameStr() << table.getTableNameStr(); + const SYSvector& colNames = columns.getNames(); + *header << colNames.length(); + for (uint32_t i=0; igetResponse(); + + } catch ( const SparrowException& e ) { + PRINT_ERR( "%s", e.getText() ); + spwerror = e; + return e.getErrcode(); + } + + return 0; +} + + +RequestGuard spw_Connection::getRequest( uint32_t id, bool remove ) +{ + RequestGuard request; + + Guard lockGuard( lockRqst_ ); + SYSslistIterator iterator(requests_); + while (++iterator) { + if ( iterator.key()->id() == id ) { + request = iterator.key(); + if ( remove ) + iterator.remove(); + break; + } + } + + return request; +} + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// spw_ConnectionProperties +////////////////////////////////////////////////////////////////////////////////////////////////////// + +spw_ConnectionProperties::spw_ConnectionProperties(const ConnectionProperties& prop) + : host_(prop.getHost()), user_(prop.getUser()), pssw_(prop.getPsswd()), + src_addr_(prop.getSrcAddr()), src_port_(prop.getSrcPort()), port_(prop.getPort()), + mysql_port_(prop.getMySQLPort()) + +{ + completeWithDefaultValues(); +} + +spw_ConnectionProperties::spw_ConnectionProperties( const Str& host, const Str& user, const Str& pssw + , const Str& src_addr, uint32_t src_port /*=0*/ + , uint32_t mysql_port /* = DEF_MYSQL_PORT */ + , uint32_t port /* = DEF_PORT */ ) + : host_(host), user_(user), pssw_(pssw), src_addr_(src_addr), src_port_(src_port), port_(port), mysql_port_(mysql_port) +{ + completeWithDefaultValues(); +} + +spw_ConnectionProperties::spw_ConnectionProperties( const char* host, const char* user, const char* pssw, + uint32_t mysql_port /*=DEF_MYSQL_PORT*/, uint32_t spw_port /*=DEF_PORT*/, + const char* src_addr /*=DEF_SOURCE_ADDR*/, uint32_t src_port /*=0*/ ) + : host_(host), user_(user), pssw_(pssw), src_addr_(src_addr), src_port_(src_port), port_(spw_port), mysql_port_(mysql_port) +{ + completeWithDefaultValues(); +} + +const ConnectionProperties& spw_ConnectionProperties::operator = ( const ConnectionProperties& prop ){ + if ( &prop == this ) + return *this; + + host_ = prop.getHost(); + user_ = prop.getUser(); + pssw_ = prop.getPsswd(); + src_addr_ = prop.getSrcAddr(); + src_port_ = prop.getSrcPort(); + port_ = prop.getPort(); + mysql_port_ = prop.getMySQLPort(); + + completeWithDefaultValues(); + + return *this; +} + +void spw_ConnectionProperties::completeWithDefaultValues() { + + if ( host_.length() == 0 ) { + host_ = DEF_HOSTNAME; + } + if ( user_.length() == 0 ) { + user_ = DEF_USERNAME; + } + if ( src_addr_.length() == 0 ) { + src_addr_ = DEF_SOURCE_ADDR; + } + if ( port_ == 0 ) + port_ = DEF_PORT; + if ( mysql_port_ == 0 ) + mysql_port_ = DEF_MYSQL_PORT; +} + +Str spw_ConnectionProperties::getString() const +{ + char desc[1024]; + snprintf( desc, sizeof(desc), "%s (%u), user '%s', src addr. %s, spw port %u", + host_.c_str(), mysql_port_, user_.c_str(), src_addr_.c_str(), port_); + return Str( desc ); +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Response +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void Response::decompress() _THROW_(SparrowException) +{ + // Decompress response if required. + if (buffer_.limit() < length_) { + if (compressionAlgorithm_ != 1) { + throw SparrowException::create(false, SPW_API_FAILED, "Only lzjb compression is supported"); + } + HeapBuffer buffer(length_); + const int result = LZJB::decompress(buffer_.getData(), buffer.getData(), buffer_.limit(), length_); + if (result != 0) { + throw SparrowException::create(false, SPW_API_FAILED, "Cannot decompress data"); + } + buffer_ = buffer; + } +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Request +////////////////////////////////////////////////////////////////////////////////////////////////////// + +inline Request::Request( uint32_t id ) : id_(id), sema_(""), response_(NULL), exception_(NULL) +{ +} + +inline Request::~Request() +{ + if ( exception_ != NULL ) { + delete exception_; + } + if ( response_ != NULL ) { + delete response_; + } +} + + +inline void Request::responseReceived( Response* response ) +{ + response_ = response; + sema_.post(); +} + +inline void Request::exceptionReceived( const SparrowException& e ) +{ + if ( exception_ == NULL ) { + exception_ = new SparrowException( e ); + //PRINT_DBUG("[Request::exceptionReceived] EXCEPT %s", exception_->getText()); + } + sema_.post(); +} + +inline void Request::check() _THROW_(SparrowException) +{ + SparrowException e; + if ( response_->getException( e ) ) { + //PRINT_DBUG("[Request::check] exception in packet: %d, %s", e.getErrcode(), e.getText()); + throw e; + } +} + +Response* Request::getResponse() _THROW_(SparrowException) +{ + //PRINT_DBUG("[Request::getResponse] waiting..."); + sema_.wait(); + if ( exception_ == NULL ) { + //PRINT_DBUG("[Request::getResponse] notif response!"); + SPW_dbgASSERT( response_ ); + response_->decompress(); + check(); + } else { + //PRINT_DBUG("[Request::getResponse] notif EXCEPT!"); + throw SparrowException( *exception_ ); + } + return response_; +} + +Str Request::getString() const +{ + char desc[1024]; + sprintf( desc, "%u", id_ ); + if ( response_ != NULL ) { + strcat( desc, ", response" ); + } + if ( exception_ != NULL ) { + strcat( desc, ", except " ); + strcat( desc, exception_->getText() ); + } + return Str( desc ); +} + +} // namespace Saprrow diff --git a/storage/sparrow/api/spw_connection.h b/storage/sparrow/api/spw_connection.h new file mode 100644 index 000000000000..e8f0f7c739e8 --- /dev/null +++ b/storage/sparrow/api/spw_connection.h @@ -0,0 +1,274 @@ +#ifndef _spw_api_impl_connection_h +#define _spw_api_impl_connection_h + +#include "include/connection.h" +#include "include/exception.h" +#include "sema.h" +#include "thread.h" +#include "bufferlist.h" + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#include +#endif + + + +#define SPARROW_SOCKET_RECEIVE_BUFFER_SIZE (64*1024) +#define SPARROW_SOCKET_SEND_BUFFER_SIZE (4*1024*1024) + +namespace Sparrow +{ + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Response +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Response +{ +private: + + // Response buffer. May be compressed initially. + HeapBuffer buffer_; + + // Length of decompressed response. + uint32_t length_; + + // Compression algorithm. + uint32_t compressionAlgorithm_; + +public: + Response(uint32_t compressedLength, uint32_t length, uint32_t compressionAlgorithm) : + buffer_(compressedLength), length_(length), compressionAlgorithm_(compressionAlgorithm) + {} + + HeapBuffer& getBuffer() { return buffer_; } + + bool getException(SparrowException& e) { + if ( length_ == 0 ) + return false; + Str msg; + unsigned int err_code = SPW_API_FAILED; + buffer_ >> msg; + if ( msg.length() == 0 ) + return false; + if ( buffer_.position() != buffer_.limit() ) { + buffer_ >> err_code; + if ( err_code == 0 ) + return false; + } + e = SparrowException( msg.c_str(), (int32_t)err_code ); + return true; + } + + void decompress() _THROW_(SparrowException); +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Request +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Connection; +class Request : public RefCounted +{ + friend class Connection; + +private: + uint32_t id_; + Sema sema_; + Response* response_; + + SparrowException* exception_; + +protected: + void check() _THROW_(SparrowException); + +public: + Request(uint32_t id); + ~Request(); + + uint32_t id() const { return id_; } + + // A response has been received for this request. + void responseReceived(Response* response); + + // An I/O error was received for this request. + void exceptionReceived(const SparrowException&); + + // Waits indefinitely for a response. + Response* getResponse() _THROW_(SparrowException); + + Str getString() const; +}; + +typedef RefPtr RequestGuard; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Connection Properties +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_ConnectionProperties : public ConnectionProperties { +private: + void completeWithDefaultValues(); + +public: + Str host_; + Str user_; + Str pssw_; + Str src_addr_; + uint32_t src_port_; + uint32_t port_; + uint32_t mysql_port_; + +public: + spw_ConnectionProperties() : src_port_(0), port_(0), mysql_port_(0) {} + spw_ConnectionProperties(const ConnectionProperties&); + spw_ConnectionProperties(const Str& host, const Str& user, const Str& pssw, + const Str& src_addr, uint32_t srcPort=0, uint32_t mysql_port=DEF_MYSQL_PORT, + uint32_t port=DEF_PORT); + spw_ConnectionProperties(const char* host, const char* user, const char* psswd, + uint32_t mysqlPort=DEF_MYSQL_PORT, uint32_t spwPrt=DEF_PORT, + const char* srcAddr=DEF_SOURCE_ADDR, uint32_t srcPort=0); + + const ConnectionProperties& operator = (const ConnectionProperties&); + + const char* getHost() const override { return host_.c_str(); } + const char* getUser() const override { return user_.c_str(); } + const char* getPsswd() const override { return pssw_.c_str(); } + const char* getSrcAddr() const override { return src_addr_.c_str(); } + uint32_t getSrcPort() const override { return src_port_; } + uint32_t getPort() const override { return port_; } + uint32_t getMySQLPort() const override { return mysql_port_; } + + Str getString() const; +}; + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Connection +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_Table; +class spw_Connection : public Thread, public Connection +{ + friend class spw_Table; + +public: + enum Action { + AUTH, + INIT, + INSERT_DATA, + GET_MASTER, + DISABLE_COALESCING, + REMOVE_PARTITIONS, + DISABLE_COALESCING_DB, + INSERT_DATA_EX, + SWITCH_PURGE_MODE + }; + +private: + + static const uint8_t TAG[]; + + static const uint8_t SPARROW_API_VERSION; + + // Protect access to the socket, fdSet_ , properties_ and the listening thread + Lock lockSckt_; + + // Protect access to the request list, requests_ + Lock lockRqst_; + + // Socket to communicate with Sparrow + my_socket socket_; + spw_ConnectionProperties properties_; + uint32_t compressionAlgorithm_; + + // Listening thread + fd_set fdSet_; + + // Array of currently active requests + SYSslist requests_; + RequestGuard getRequest(uint32_t id, bool remove); + + + // Request counter (used to generate Request::id_) + uint32_t counter_; + + RequestGuard createNewRequest(); + void resetRqsts(const SparrowException& e); + + void sendHeader(SocketWriter& writer, uint32_t rqstId, uint32_t len, uint32_t comprLen, Action action); + RequestGuard compressAndSendBuffer(Action action, const ByteBuffer& buffer); + RequestGuard compressAndSendBuffer(Action action, const BufferList& buffer); + + RequestGuard authenticate() _THROW_(SparrowException); + + void disconnectAndResetRqsts(const SparrowException&, bool lock); + void closeSocket(bool lock); + +protected: + + bool process() override; + + void notifyStop() override { + // I'm the only tread, so no-one to notify + } + + bool deleteAfterExit() override { + return false; + } + + void initialize(const spw_Table&) _THROW_(SparrowException); + + +public: + spw_Connection(); + ~spw_Connection(); + + int setProperties(const ConnectionProperties& properties, uint32_t compressionAlgo=0) override; + int setProperties(const char* host, const char* user, const char* psswd, + uint32_t mysqlPort=DEF_MYSQL_PORT, uint32_t spwPrt=DEF_PORT, const char* srcAddr=DEF_SOURCE_ADDR, + uint32_t srcPort=0, uint32_t compressionAlgo=0) override; + const ConnectionProperties* getProperties() const override { return &properties_;} + + int connect() override; + void disconnect() override; + + bool isClosed() const override; + + Table* createTable() const override; + Table* getTable(const char* database, const char* table) override; + void releaseTable(const Table*) const override; + + SparrowBuffer* createBuffer(const Table* table, uint32_t capacity=UINT_MAX32) const override; + void releaseBuffer(const SparrowBuffer*) const override; + + ColumnNames* createColumnNames(int size=0) override; + void releaseColumnNames(const ColumnNames*) override; + int insertData(const Table* table, const SparrowBuffer* buffers) override; + int insertData(const Table* table, const ColumnNames* columns, const SparrowBuffer* buffers) override; + + int disableCoalescing(uint32_t timeout, bool wait=false) override; + int disableCoalescing(uint32_t timeout, const char* database, bool wait=false) override; + int removePartitions(const char* database, const char* table, const uint64_t start, const uint64_t end) override; + int switchPurgeMode(uint32_t timeout, const char* database, PurgeMode mode) override; + + Master* getMasterFile(const char* database, const char* table) override; + void releaseMasterFile(const Master*) const override; + + unsigned int getListeningThrdId() const override { return threadId_; } +}; + +} // namespace Sparrow + + + +#endif // #ifndef _spw_api_impl_connection_h diff --git a/storage/sparrow/api/spw_master.cc b/storage/sparrow/api/spw_master.cc new file mode 100644 index 000000000000..033140fbd11f --- /dev/null +++ b/storage/sparrow/api/spw_master.cc @@ -0,0 +1,71 @@ +#include "memalloc.h" +#include "spw_master.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Master +////////////////////////////////////////////////////////////////////////////////////////////////////// + +spw_Master::spw_Master() + : size_(0), version_(0), maxLifetime_(0), aggregationPeriod_(0), autoInc_(0), + serial_(0), timeCreated_(0), timeUpdated_(0), dataSize_(0), indexSize_(0), + records_(0), indexAlterSerial_(0), indexAlterElapsed_(0), coalescingPeriod_(0), + defaultWhere_(0), stringOptimization_(0) +{} + +// Deserialization. +ByteBuffer& operator >> (ByteBuffer& buffer, spw_Master& master) { + buffer >> master.size_; + buffer >> master.version_; + buffer >> master.columns_ >> master.indexes_ >> master.indexMappings_ + >> master.foreignKeys_ >> master.dnsConfiguration_; + buffer >> master.maxLifetime_; + buffer >> master.aggregationPeriod_ >> master.defaultWhere_ >> master.stringOptimization_; + buffer >> master.autoInc_; + buffer >> master.serial_ >> master.timeCreated_ >> master.timeUpdated_ + >> master.dataSize_ >> master.indexSize_ >> master.records_; + buffer >> master.indexAlterSerial_ >> master.indexAlterElapsed_; + buffer >> master.indexAlterations_ >> master.coalescingPeriod_; + + uint32_t length; + buffer >> length; + for (uint32_t i = 0; i < length; ++i) { + spw_PersistentPartition* partition = new spw_PersistentPartition(); + buffer >> *partition; + + // Older versions may contain empty partitions. + if (partition->getRecords() > 0) { + master.partitions_.append(partition); + } else { + delete partition; + } + } + + return buffer; +} + +// Serialization +ByteBuffer& operator << (ByteBuffer& buffer, const spw_Master& master) { + buffer << master.version_; + buffer << master.columns_ << master.indexes_ << master.indexMappings_ + << master.foreignKeys_ << master.dnsConfiguration_; + buffer << master.maxLifetime_ + << master.aggregationPeriod_ << master.defaultWhere_ << master.stringOptimization_ + << master.autoInc_ << master.serial_ << master.timeCreated_ + << master.timeUpdated_ << master.dataSize_ << master.indexSize_ << master.records_ + << master.indexAlterSerial_ << master.indexAlterElapsed_ << master.indexAlterations_ + << master.coalescingPeriod_; + + const Partitions& partitions = master.partitions_; + const uint32_t length = partitions.length(); + buffer << length; + for ( uint32_t i=0; i(partition); + } + + return buffer; +} + +} diff --git a/storage/sparrow/api/spw_master.h b/storage/sparrow/api/spw_master.h new file mode 100644 index 000000000000..e718fae6af7b --- /dev/null +++ b/storage/sparrow/api/spw_master.h @@ -0,0 +1,184 @@ +#ifndef _spw_api_impl_master_h_ +#define _spw_api_impl_master_h_ + +#include "include/master.h" +#include "spw_types.h" + + +namespace Sparrow +{ + +typedef SYSvector IndexMappings; +typedef SYSvector ActiveIndexes; + +class spw_Master : public Master, public RefCounted +{ + friend ByteBuffer& operator >> (ByteBuffer& buffer, spw_Master& master); + friend ByteBuffer& operator << (ByteBuffer& buffer, const spw_Master& master); + +private: + + // Size of the master file, in bytes. + uint64_t size_; + + // Master file version + uint32_t version_; + + // Table columns + Columns columns_; + + // Indexes on the table. Contains also dropped indexes. + Indexes indexes_; + + // Active indexes on the table. Does not contain dropped indexes. + ActiveIndexes activeIndexes_; + + // Index mappings: relations between Sparrow indexes and MySQL indexes. + IndexMappings indexMappings_; + + // Foreign keys. + ForeignKeys foreignKeys_; + + // DNS configuration. + DnsConfiguration dnsConfiguration_; + + // Maximum lifetime of data in this table, in milliseconds. + uint64_t maxLifetime_; + + // Aggregation period, in seconds, of data in this table. If 0, there is no aggregation. + uint32_t aggregationPeriod_; + + // Current value of the auto-incremental column. + int64_t autoInc_; + + // Current partition serial number. + uint64_t serial_; + + // Creation date, in milliseconds since epoch. + uint64_t timeCreated_; + + // Update date, in milliseconds since epoch. + uint64_t timeUpdated_; + + // Total size of data files, in bytes. + uint64_t dataSize_; + + // Total size of index files, in bytes. + uint64_t indexSize_; + + // Total number of records (rows). + uint64_t records_; + + // Current index alter serial number. + uint32_t indexAlterSerial_; + + // Index alter duration. + uint64_t indexAlterElapsed_; + + // Online index modifications. + Alterations indexAlterations_; + + // List of partitions. + Partitions partitions_; + + // Coalescing period, in milliseconds. + uint64_t coalescingPeriod_; + + // Default where period, in milliseconds. + uint64_t defaultWhere_; + + // String optimizatino size, in bytes. + uint64_t stringOptimization_; + +public: + // Deserialization constructor. + spw_Master(); + + uint64_t getSize() const override { return size_; } + uint32_t getVersion() const override { return version_; } + uint64_t getMaxLifetime() const override { return maxLifetime_; } + uint32_t getAggregPeriod() const override { return aggregationPeriod_; } + uint64_t getCoalescingPeriod() const override { return coalescingPeriod_; } + uint64_t getDefaultWhere() const override { return defaultWhere_; } + uint64_t getStringOptimization() const override { return stringOptimization_; } + uint64_t getSerial() const override { return serial_; } + uint64_t getTimeCreated() const override { return timeCreated_; } + uint64_t getTimeUpdated() const override { return timeUpdated_; } + uint64_t getDataSize() const override { return dataSize_; } + uint64_t getIndexSize() const override { return indexSize_; } + uint64_t getRecords() const override { return records_; } + uint32_t getIndexAlterSerial() const override { return indexAlterSerial_; } + + + const Columns& getColumns() const { return columns_; } + uint32_t getNbColumns() const override { return columns_.length(); } + const Column& getColumn(uint32_t index) const override { + SPW_ASSERT(index < columns_.length()); + return columns_[index]; + } + + const Indexes& getIndexes() const { return indexes_; } + uint32_t getNbIndexes() const override { return indexes_.length(); } + const Index& getIndex(uint32_t index) const override { + SPW_ASSERT(index < indexes_.length()); + return indexes_[index]; + } + + const ActiveIndexes& getActiveIndexes() const { return activeIndexes_; } + uint32_t getNbActiveIndexes() const override { return activeIndexes_.length(); } + uint32_t getActiveIndexes(uint32_t index) const override { + SPW_ASSERT(index < activeIndexes_.length()); + return activeIndexes_[index]; + } + + const IndexMappings& getIndexMappings() const { return indexMappings_; } + uint32_t getNbIndexMappings() const override { return indexMappings_.length(); } + uint32_t getIndexMapping(uint32_t index) const override { + SPW_ASSERT(index < indexMappings_.length()); + return indexMappings_[index]; + } + + const ForeignKeys& getForeignKeys() const { return foreignKeys_; } + uint32_t getNbFK() const override { return foreignKeys_.length(); } + const ForeignKey& getFK(uint32_t index) const override { + SPW_ASSERT(index < foreignKeys_.length()); + return foreignKeys_[index]; + } + + const DnsConfiguration& getDnsConfiguration() const { return dnsConfiguration_; } + uint32_t getNbDnsEntries() const override { return dnsConfiguration_.length(); } + uint32_t getDnsEntry(uint32_t index) const override { + SPW_ASSERT(index < dnsConfiguration_.length()); + return dnsConfiguration_[index].getId(); + } + uint32_t getNbDnsServers(uint32_t index) const override { + SPW_ASSERT(index < dnsConfiguration_.length()); + return dnsConfiguration_[index].getServers().length(); + } + const DnsServer& getDnsServer(uint32_t index, uint32_t index2) const override { + SPW_ASSERT(index < dnsConfiguration_.length()); + SPW_ASSERT(index < dnsConfiguration_[index].getServers().length()); + return dnsConfiguration_[index].getServers()[index2]; + } + + const Alterations& getIndexAlterations() const { return indexAlterations_; } + uint32_t getNbIndexAlterations() const override { return indexAlterations_.length(); } + const Alteration& getIndexAlteration(uint32_t index) const override { + SPW_ASSERT(index < indexAlterations_.length()); + return indexAlterations_[index]; + } + + const Partitions& getPartitions() const { return partitions_; } + uint32_t getNbPartitions() const override { return partitions_.length(); } + const Partition& getPartition(uint32_t index) const override { + SPW_ASSERT(index < partitions_.length()); + return *partitions_[index]; + } + +}; + +typedef RefPtr MasterGuard; + +} // namespace Sparrow + +#endif // #define _spw_api_impl_master_h_ diff --git a/storage/sparrow/api/spw_sparrowbuffer.cc b/storage/sparrow/api/spw_sparrowbuffer.cc new file mode 100644 index 000000000000..f905e7e55348 --- /dev/null +++ b/storage/sparrow/api/spw_sparrowbuffer.cc @@ -0,0 +1,234 @@ +#include "memalloc.h" +#include "spw_sparrowbuffer.h" +#include "spw_table.h" + +#include + +#define SPW_TRY try { +#define SPW_CATCH } catch ( const SparrowException& e ) { \ + PRINT_DBUG( e.getText() ); \ + spwerror = e; \ + return e.getErrcode(); \ + } + + +namespace Sparrow +{ + +#define BUFFER_SIZE 65536 + +spw_SparrowBuffer::spw_SparrowBuffer(const Table* table, uint32_t capacity) + : buffer_( capacity, BUFFER_SIZE ), table_(table), cursor_(0) +{ + SPW_ASSERT(table); + + clear(); + + const spw_Table* tbl = static_cast(table); + columns_ = tbl->getColumns(); +} + +spw_SparrowBuffer::~spw_SparrowBuffer(void) +{ +} + +void spw_SparrowBuffer::clear() +{ + buffer_.clear(); + rows_ = 0; + timestamp_ = 0; + cursor_ = 0; +} + +// throws an exception if input data type is not compatible with column +void spw_SparrowBuffer::checkCompatibility( int column, ColumnType inType ) _THROW_(SparrowException) +{ + if ( column != (int)(cursor_++%columns_.getCount())) { + throw SparrowException::create(false, SPW_API_COLINDX_OOB, "Bad insertion sequence in table %s: inserted value for column %u" + ", expected value for column %u", (table_ != NULL ? table_->getTableName() : "???"), column, ((cursor_-1)%columns_.getCount())); + } + if ( column < 0 || static_cast(column) > columns_.getCount() ) { + throw SparrowException::create(false, SPW_API_COLINDX_OOB, "Column index %u is out of bounds for table %s", + column, (table_ != NULL ? table_->getTableName() : "???")); + } + const spw_Column& col = columns_[column]; + bool compatible = true; + if ( col.getType() == COL_TIMESTAMP ) { + if ( inType != COL_LONG ) { + compatible = false; + } + } else if ( col.getType() != inType ) { + compatible = false; + } + if ( !compatible ) { + throw SparrowException::create(false, SPW_API_INCOMPATIBLE_TYPES, "Cannot insert %s data in %s.%s", + getType(inType), (table_ != NULL ? table_->getTableName() : "???"), col.getName()); + } +} + +int spw_SparrowBuffer::addNull(int column) +{ + SPW_TRY + cursor_++; + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)1 ); + } else { + throw SparrowException::create(false, SPW_API_COL_NOT_NULLABLE, "Column \"%s\" cannot contain NULL.", columns_[column].getName()); + } + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addBool(int column, bool value) +{ + SPW_TRY + checkCompatibility( column, COL_BYTE ); + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.put( (uint8_t)(value ? 1 : 0) ); + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addByte(int column, uint8_t value) +{ + SPW_TRY + checkCompatibility( column, COL_BYTE ); + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.put( value ); + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addShort(int column, uint16_t value) +{ + SPW_TRY + checkCompatibility( column, COL_SHORT ); + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.putShort( value ); + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addInt(int column, uint32_t value) +{ + SPW_TRY + checkCompatibility( column, COL_INT ); + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.putInt( value ); + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addLong(int column, uint64_t value) +{ + SPW_TRY + checkCompatibility( column, COL_LONG ); + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.putLong( value ); + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addDouble(int column, double value) +{ + SPW_TRY + checkCompatibility( column, COL_DOUBLE ); + if (std::isfinite(value)) { + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.putDouble( value ); + } else { + if ( columns_[column].isFlagSet(COL_NULLABLE) ) { + buffer_.put( (uint8_t)1 ); + } + buffer_.putDouble(0); + } + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addString(int column, const char* value) +{ + SPW_TRY + checkCompatibility( column, COL_STRING ); + bool nullable = columns_[column].isFlagSet(COL_NULLABLE); + if ( value == NULL ) { + if ( nullable ) { + buffer_.put( (uint8_t)1 ); + } else { + throw SparrowException::create(false, SPW_API_COL_NOT_NULLABLE, "Column \"%s\" cannot contain NULL.", columns_[column].getName()); + } + } else { + if ( nullable ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.put( value ); + } + SPW_CATCH + return 0; +} + +int spw_SparrowBuffer::addBlob(int column, const uint8_t* value, uint32_t length) +{ + SPW_TRY + checkCompatibility( column, COL_BLOB ); + bool nullable = columns_[column].isFlagSet(COL_NULLABLE); + if ( length == 0 && nullable ) { + buffer_.put( (uint8_t)1 ); + } else { + if ( nullable ) { + buffer_.put( (uint8_t)0 ); + } + buffer_.putInt( length ); + buffer_.put( ByteBuffer( value, length ) ); + } + SPW_CATCH + return 0; +} + +// If it's a buffer full error, it is caught and replaced by an error code +int spw_SparrowBuffer::addRow( const SparrowRow& row, void* dummy /* =NULL */ ) +{ + int res = 0; + const bool wasEmpty = timestamp_ == 0; + buffer_.mark(); + if ( (res=row.decode( this, dummy )) != 0 ) { + buffer_.reset(); + return res; + } + rows_++; + if (wasEmpty) { + timestamp_ = time(0); + } + + return 0; + + /*try { + row.decode( this, table_, dummy ); + rows_++; + if (wasEmpty) { + timestamp_ = time(0); + } + } catch ( const SparrowException& e ) { + buffer_.reset(); + if ( e.getErrcode() != SPW_API_BUFFER_FULL ) { + throw e; + } else { + return false; + } + } + return true;*/ +} + +} diff --git a/storage/sparrow/api/spw_sparrowbuffer.h b/storage/sparrow/api/spw_sparrowbuffer.h new file mode 100644 index 000000000000..42e794b12524 --- /dev/null +++ b/storage/sparrow/api/spw_sparrowbuffer.h @@ -0,0 +1,60 @@ +#ifndef _spw_api_impl_sparrowbuffer_h_ +#define _spw_api_impl_sparrowbuffer_h_ + +//#include "include/sparrowbuffer.h" +#include "include/exceptwrapper.h" +#include "spw_types.h" +#include "bufferlist.h" + +namespace Sparrow +{ + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// spw_SparrowBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_SparrowBuffer : public SparrowBuffer +{ +private: + BufferList buffer_; + Columns columns_; + uint32_t rows_; + uint64_t timestamp_; // Number of seconds since 01/01/1970 + const Table* table_; // Only to pass column description to SparrowRow::decode() + uint32_t cursor_; + + void checkCompatibility(int column, ColumnType inType) _THROW_(SparrowException); + +public: + spw_SparrowBuffer(const Table* table, uint32_t capacity); + ~spw_SparrowBuffer(); + + // Those methods pass along any SparrowException from BufferList methods + int addNull(int column) override; + int addBool(int column, bool value) override; + int addByte(int column, uint8_t value) override; + int addShort(int column, uint16_t value) override; + int addInt(int column, uint32_t value) override; + int addLong(int column, uint64_t value) override; + int addDouble(int column, double value) override; + int addString(int column, const char* value) override; + int addBlob(int column, const uint8_t* value, uint32_t length) override; + + void clear() override; + bool isEmpty() const override { return buffer_.getPosition() == 0; } + uint32_t getSize() const override { return buffer_.getPosition(); } + uint32_t getRows() const override { return rows_; } + uint64_t getTimestamp() const override { return timestamp_; } + bool hasExpired(uint32_t delay) const override { + time_t now = time(0); + return !isEmpty() && timestamp_ + delay < static_cast(now); + } + + int addRow(const SparrowRow& row, void* dummy=NULL) override; + + const SYSvector& getBuffers() const { return buffer_.getBuffers(); } +}; + +} // namespace Sparrow + +#endif // #define _spw_api_impl_sparrowbuffer_h_ diff --git a/storage/sparrow/api/spw_table.cc b/storage/sparrow/api/spw_table.cc new file mode 100644 index 000000000000..993beb695f5a --- /dev/null +++ b/storage/sparrow/api/spw_table.cc @@ -0,0 +1,209 @@ +#include "memalloc.h" +#include "spw_table.h" +#include "spw_connection.h" + +// MySQL C connector +#include + + +namespace Sparrow +{ + +InitMySQLib spw_Table::initMySQL_; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// spw_Table +////////////////////////////////////////////////////////////////////////////////////////////////////// +spw_Table::spw_Table() : maxLifetime_(0), coalescingPeriod_(0), aggregationPeriod_(0), defaultWhere_(0), stringOptimization_(0) +{} + +spw_Table::spw_Table(const Str& database, const Str& table) + : maxLifetime_(0), coalescingPeriod_(0), aggregationPeriod_(0), defaultWhere_(0), stringOptimization_(0) +{ + if ( database.length() == 0 ) + throw SparrowException::create(false, SPW_API_FAILED, "Database name cannot be empty"); + databaseName_ = database; + databaseName_.toLower(); + + if ( table.length() == 0 ) + throw SparrowException::create(false, SPW_API_FAILED, "spw_Table name cannot be empty"); + tableName_ = table; + tableName_.toLower(); +} + +spw_Table::spw_Table(const Str& database, const Str& table, + const Columns& columns, const Indexes& indexes, const ForeignKeys& foreignKeys, + const DnsConfiguration& dns, uint64_t maxLifetime, uint64_t coalescingPeriod, + uint32_t aggregationPeriod, uint64_t defaultWhere, uint64_t stringOptimization) + : columns_(columns), indexes_(indexes), foreignKeys_(foreignKeys), dns_(dns), + maxLifetime_(maxLifetime), coalescingPeriod_(coalescingPeriod), aggregationPeriod_(aggregationPeriod), + defaultWhere_(defaultWhere), stringOptimization_(stringOptimization) +{ + if ( database.length() == 0 ) + throw SparrowException::create(false, SPW_API_FAILED, "Database name cannot be empty"); + databaseName_ = database; + databaseName_.toLower(); + + if ( table.length() == 0 ) + throw SparrowException::create(false, SPW_API_FAILED, "spw_Table name cannot be empty"); + tableName_ = table; + tableName_.toLower(); + + if ( columns.length() == 0 ) { + throw SparrowException::create(false, SPW_API_FAILED, "No column defined."); + } + + // Check the first column is a NOT NULL timestamp. + if ( columns[0].getType() != COL_TIMESTAMP + || columns[0].isFlagSet(COL_NULLABLE) ) { + throw SparrowException::create(false, SPW_API_FAILED, "The first column must be a NOT NULL timestamp."); + } + + // Checks if indexes reference the table columns. + for ( uint32_t i=0; i= columns.length() ) { + throw SparrowException::create(false, SPW_API_FAILED, "Index %u must reference columns in the table.", i); + } + } + } + + // Check duplicate indexes. + for ( uint32_t i=0; i fcolumns; + for ( uint32_t i=0; i columns.length() ) { + throw SparrowException::create( false, SPW_API_FAILED, "Foreign key %u must reference a column defined in the table.", i ); + } + for ( uint32_t j=0; j= columns.length() ) { + throw SparrowException::create( false, SPW_API_FAILED, "Column \"%s\" references a column out of range: %d.", column.getName(), ip ); + } + if ( columns[ip].getType() != COL_BLOB || !columns[ip].isFlagSet(COL_IP_ADDRESS) ) { + throw SparrowException::create( false, SPW_API_FAILED, "Column \"%s\" contains reverse DNS lookups of values " + "from column \"%s\". This column does not contain IP addresses.", + column.getName(), columns[ip].getName() ); + } + if ( !column.isFlagSet(COL_NULLABLE) ) { + throw SparrowException::create( false, SPW_API_FAILED, "Column \"%s\" must be nullable since it contains reverse DNS lookups.", column.getName() ); + } + } + } + + if ( hasReverseDns && dns.entries() > 0 ) { + if ( !hasDnsIdentifier && (dns.entries() > 1 || !dns.contains(-1) ) ) { + throw SparrowException::create( false, SPW_API_FAILED, "At least one column contains reverse DNS lookups and there is no DNS identifier column. " + "In this case, the DNS configuration can contain only the wildcard identifier."); + } + } +} + +void spw_Table::getColumnDefinition( Str& str, const spw_Column& column ) { + char buffer[1024]; + snprintf( buffer, sizeof(buffer), "`%s` %s", column.getName(), getType(column.getType()) ); + if ( column.getType() == COL_BLOB || column.getType() == COL_STRING ) { + char b[64]; + snprintf( b, sizeof(b), "(%u)", column.getStringSize() ); + strcat( buffer, b ); + } + if ( column.isFlagSet(COL_UNSIGNED) ) { + strcat( buffer, " UNSIGNED" ); + } + if ( column.isFlagSet(COL_NULLABLE) ) { + strcat( buffer, " NULL" ); + } else { + strcat( buffer, " NOT NULL" ); + } + const char* defaultValue = column.getDefaultValue(); + if (strlen(defaultValue) > 0) { + strcat(buffer, " DEFAULT '"); + strcat(buffer, defaultValue); + strcat(buffer, "'"); + } + if ( column.isFlagSet(COL_AUTO_INC) ) { + strcat( buffer, " AUTO_INCREMENT" ); + } + str = Str( buffer, false ); +} + +int spw_Table::create( Connection* connection ) +{ + SPW_relASSERT(connection); + if ( connection->isClosed() ) { + spwerror = SparrowException( "Not connected.", true, SPW_API_SOCKET_CONN_CLOSED ); + return SPW_API_SOCKET_CONN_CLOSED; + } + + spw_Connection* conn = static_cast(connection); + + try { + conn->initialize( *this ); + return 0; + } catch ( const SparrowException& e ) { + spwerror = e; + return e.getErrcode(); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// InitMySQLib +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void InitMySQLib::initialize() { + if ( initialized_ ) return; + if ( mysql_library_init(0, nullptr, nullptr) ) { + throw SparrowException::create(false, SPW_API_FAILED, "could not initialize MySQL library"); + } + initialized_ = true; +} + +void InitMySQLib::clear() { + if ( initialized_ ) { + mysql_library_end(); + } + initialized_ = false; +} + + +} diff --git a/storage/sparrow/api/spw_table.h b/storage/sparrow/api/spw_table.h new file mode 100644 index 000000000000..99f9d87d9003 --- /dev/null +++ b/storage/sparrow/api/spw_table.h @@ -0,0 +1,185 @@ +#ifndef _spw_api_impl_table_h +#define _spw_api_impl_table_h + +#include "include/table.h" +#include "spw_types.h" + + +namespace Sparrow +{ + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// InitMySQLib +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class InitMySQLib +{ +private: + bool initialized_; + void clear(); + +public: + InitMySQLib() : initialized_(false) {} + ~InitMySQLib() { + clear(); + } + + void initialize(); +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Table +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_Connection; +class spw_Table : public Table +{ +private: + // Name of the database + Str databaseName_; + + // Name of the table + Str tableName_; + + // Columns this table is made of + Columns columns_; + + // Indexes defined on this table + Indexes indexes_; + + // Foreign key defined on this table + ForeignKeys foreignKeys_; + + // DNS configuration for this table + DnsConfiguration dns_; + + // Maximum lifetime of data in this table, in milliseconds. + uint64_t maxLifetime_; + + // Coalescing period, in milliseconds. + uint64_t coalescingPeriod_; + + // Aggregation period, in seconds, of data in this table. If 0, there is no aggregation. + uint32_t aggregationPeriod_; + + // Default where period, in milliseconds. + uint64_t defaultWhere_; + + // String optimization size, in bytes. + uint64_t stringOptimization_; + + static InitMySQLib initMySQL_; + +protected: + void getColumnDefinition(Str& str, const spw_Column& column); + +public: + spw_Table(); + spw_Table(const Str& database, const Str& table); + spw_Table(const Str& database, const Str& table, + const Columns& columns, const Indexes& indexes, const ForeignKeys& foreignKeys, + const DnsConfiguration& dns, uint64_t maxLifetime, uint64_t coalescingPeriod, + uint32_t aggregationPeriod, uint64_t defaultWhere, uint64_t stringOptimization); + + ~spw_Table() {} + + static void initialize() _THROW_(SparrowException) { + initMySQL_.initialize(); + } + + int create(Connection* connection) override; + + void setDatabaseName( const char* name ) override { + databaseName_ = name; + } + const char* getDatabaseName() const override { + return databaseName_.c_str(); } + + const Str& getDbNameStr() const { + return databaseName_; } + + void setTableName( const char* name ) override { + tableName_ = name; + } + const char* getTableName() const override { return tableName_.c_str(); } + + const Str& getTableNameStr() const { return tableName_; } + + void setMaxLifetime( uint64_t maxLifetime ) override { + maxLifetime_ = maxLifetime; + } + uint64_t getMaxLifetime() const override { return maxLifetime_; } + + void setCoalescPeriod( uint64_t coalescingPeriod ) override { + coalescingPeriod_ = coalescingPeriod; + } + uint64_t getCoalescPeriod() const override { return coalescingPeriod_; } + + void setAggregPeriod( uint32_t aggregationPeriod ) override { + aggregationPeriod_ = aggregationPeriod; + } + uint32_t getAggregPeriod() const override { return aggregationPeriod_; } + + void setDefaultWhere(uint64_t defaultWhere) override { + defaultWhere_ = defaultWhere; + } + uint64_t getDefaultWhere() const override { return defaultWhere_; } + + void setStringOptimization(uint64_t stringOptimization) override { + stringOptimization_ = stringOptimization; + } + uint64_t getStringOptimization() const override { return stringOptimization_; } + + int appendColumn(const char* name, uint32_t index, ColumnType type, uint32_t stringSize=0, + uint32_t flags=0, uint32_t info=0, const char* charset=DEF_CHARSET) override { + return columns_.append( spw_Column( name, index, type, stringSize, flags, info, charset ) ); + } + uint32_t getNbColumns() const override { return columns_.length(); } + const Column& getColumn(uint32_t index) const override { + SPW_ASSERT(index < columns_.length()); + return columns_[index]; + } + + Column& getColumn(uint32_t index) override { + SPW_ASSERT(index < columns_.length()); + return columns_[index]; + } + + const Columns& getColumns() const { return columns_; } + void setColumns(const Columns& columns) { columns_ = columns; } + + + int appendIndex( const char* name, uint32_t colIndex, bool unique ) override { + return indexes_.append( spw_Index( name, colIndex, unique ) ); + } + int addColToIndex( uint32_t indexId, uint32_t colIndex ) override { + SPW_ASSERT( indexId < indexes_.length() ); + return indexes_[indexId].getColumnIds().append( colIndex ); + } + const Indexes& getIndexes() const { return indexes_; } + void setIndexes(const Indexes& indexes) { indexes_ = indexes; } + + + virtual int appendFK( const char* name, uint32_t colIndex, const char* databaseName, + const char* tableName, const char* columnName) override { + return foreignKeys_.append( spw_ForeignKey( name, colIndex, databaseName, tableName, columnName ) ); + } + const ForeignKeys& getForeignKeys() const { return foreignKeys_; } + void setForeignKeys(const ForeignKeys& fks) { foreignKeys_ = fks; } + + + int addDnsEntry(uint32_t dnsEntry) override { + return dns_.append( DnsConfigId( dnsEntry ) ); + } + int addDnsServer(uint32_t entryIndex, const char* name, uint32_t port, const char* sourcAddr, uint32_t sourcePort) override { + SPW_ASSERT( entryIndex < dns_.length() ); + return dns_[entryIndex].getServers().append( spw_DnsServer( name, port, sourcAddr, sourcePort ) ); + } + const DnsConfiguration& getDns() const { return dns_; } + void setDns(const DnsConfiguration& dns) { dns_ = dns; } +}; + +} // namespace Sparrow + +#endif // #define _spw_api_impl_table_h diff --git a/storage/sparrow/api/spw_types.cc b/storage/sparrow/api/spw_types.cc new file mode 100644 index 000000000000..7b4477c81433 --- /dev/null +++ b/storage/sparrow/api/spw_types.cc @@ -0,0 +1,24 @@ +#include "memalloc.h" +#include "spw_types.h" + + +namespace Sparrow +{ + +///////////////////////////////////////////////////////////////////////////////////////////////////// +// Str +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const char* Str::empty_ = ""; + + +///////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowException +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// TODO: put spwerror in thread local storage, otherwise concurrent calls from different threads overwrite +// each other's error. +SparrowException spwerror; + + +} diff --git a/storage/sparrow/api/spw_types.h b/storage/sparrow/api/spw_types.h new file mode 100644 index 000000000000..18eb90d00b33 --- /dev/null +++ b/storage/sparrow/api/spw_types.h @@ -0,0 +1,834 @@ +#ifndef _spw_api_impl_types_h_ +#define _spw_api_impl_types_h_ + +#include "include/types.h" +#include "interval.h" +#include "lock.h" +#include "misc.h" +#include "vec.h" +#include "hash.h" +#include "serial.h" +#include "include/exception.h" +#include "str.h" +//#include "treeorder.h" + +namespace Sparrow { + +// last global error +extern SparrowException spwerror; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Column implementation class +////////////////////////////////////////////////////////////////////////////////////////////////////// + +//const char* getType( ColumnType type ); + + +class spw_Table; + +// Describes a table column. +class spw_Column : public Column +{ + friend ByteBuffer& operator >> (ByteBuffer& buffer, spw_Column& column); + friend ByteBuffer& operator << (ByteBuffer& buffer, const spw_Column& column); + friend class spw_Table; + +private: + + // Column name + Str name_; + + // Column index, starting at 0. + uint32_t index_; + + // Character set + Str charset_; + + // Column type + ColumnType type_; + + // Column flags. The integer value is computed by ORing 2 power Flag ordinal values. See ColumnFlags. + uint32_t flags_; + + // Additional column info. The meaning of this attribute depends on the column flags. + uint32_t info_; + + // String size, used with variable-length types. + uint32_t stringSize_; + + // Alteration serial number of creation. + uint32_t serial_; + + // Alteration serial number of drop. + uint32_t dropSerial_; + + // Default value. + Str defaultValue_; + +public: + + spw_Column() : index_(0), type_(COL_UNKNOWN), flags_(0), info_(0), stringSize_(0), serial_(0), dropSerial_(0) { + } + + spw_Column(const char* name) : name_(name), index_(0), type_(COL_UNKNOWN), flags_(0), info_(0), stringSize_(0), serial_(0), dropSerial_(0) { + } + + spw_Column(const char* name, uint32_t index, const ColumnType type, const uint32_t stringSize=0, const uint32_t flags=0, const uint32_t info=0, const char* charset=DEF_CHARSET, const char* defaultValue="") + : name_(name), index_(index), charset_(charset), type_(type), flags_(flags), info_(info), stringSize_(stringSize), serial_(0), dropSerial_(0), defaultValue_(defaultValue) { + } + + ~spw_Column() { + } + + const char* getName() const override { + return name_.c_str(); + } + + ColumnType getType() const override { + return type_; + } + bool isString() const override { + return type_ == COL_BLOB || type_ == COL_STRING; + } + uint32_t getStringSize() const override { + return stringSize_; + } + + uint32_t getIndex() const override { + return index_; + } + + uint32_t getFlags() const override { + return flags_; + } + + bool isFlagSet(const ColumnFlags flag) const override { + return (flags_ & flag) != 0; + } + + void addFlag(const ColumnFlags flag) override { + flags_ |= flag; + } + + void removeFlag(const ColumnFlags flag) override { + flags_ &= ~flag; + } + + uint32_t getInfo() const override { + return info_; + } + + void setInfo(const uint32_t info) override { + info_ = info; + } + + uint32_t getSerial() const override { + return serial_; + } + + uint32_t getDropSerial() const override { + return dropSerial_; + } + + bool isDropped() const override { + return getDropSerial() != 0; + } + + const char* getDefaultValue() const override { + return defaultValue_.c_str(); + } + + /*uint32_t getDataSize() const { + switch (getType()) { + case COL_BLOB: return 16; + case COL_BYTE: return 1; + case COL_SHORT: return 2; + case COL_DOUBLE: return 8; + case COL_INT: return 4; + case COL_LONG: return 8; + case COL_STRING: return 16; + case COL_TIMESTAMP: return 8; + default: SPW_ASSERT(0); return 0; + } + } + + uint32_t getBits() const { + if (isString()) { + return isFlagSet(COL_NULLABLE) ? 6 : 5; + } else { + return isFlagSet(COL_NULLABLE) ? 1 : 0; + } + }*/ + + const char* getCharset() const override { + return charset_.c_str(); + } + + bool operator == (const spw_Column& right) const { + // Size does not matter. Name can be empty when upgrading from older versions. + return type_ == right.type_ && (flags_ & COL_NULLABLE) == (right.flags_ & COL_NULLABLE) + && (name_.length() == 0 || right.name_.length() == 0 || name_ == right.name_); + } +}; + +typedef SYSvector Columns; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, spw_Column& column) { + buffer >> column.name_; + int type; + buffer >> type; + column.type_ = static_cast(type); + buffer >> column.flags_ >> column.info_ >> column.charset_ >> column.serial_ >> column.dropSerial_ >> column.defaultValue_; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const spw_Column& column) { + buffer << column.name_ << static_cast(column.type_) + << column.flags_ << column.info_ << column.charset_ << column.serial_ << column.dropSerial_ << column.defaultValue_ + << column.stringSize_; + return buffer; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// List of columns names +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_ColumnNames : public ColumnNames { +private: + SYSvector names_; +public: + spw_ColumnNames(int size=0) : names_(size) {;} + ~spw_ColumnNames() {;} + uint32_t size() const override { return names_.entries(); } + uint32_t appendName(const char* name) override { + return names_.append(Str(name)); + } + const char* getName(int index) const override { + return names_[index].c_str(); + } + const SYSvector& getNames() const { return names_; } + +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Index +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Describes a table index. +typedef SYSvector IndexIds; +typedef SYSvector ColumnIds; +typedef SYSvector ColumnIdsArray; +class spw_Index : public Index +{ + friend ByteBuffer& operator >> (ByteBuffer& buffer, spw_Index& index); + friend ByteBuffer& operator << (ByteBuffer& buffer, const spw_Index& index); + friend class spw_Table; + +private: + + // Index name + Str name_; + + // Column Ids this index is made of + ColumnIds columns_; + + // true is the index is UNIQUE + bool unique_; + + // true is this index has been dropped + bool dropped_; + +public: + + spw_Index() : unique_(false), dropped_(false) { + } + + spw_Index(const char* name, uint32_t colIndex, bool unique) + : name_(name), unique_(unique), dropped_(false) { + columns_.append( colIndex ); + } + + spw_Index(const char* name, const ColumnIds& columns, bool unique) + : name_(name), columns_(columns), unique_(unique), dropped_(false) { + } + + spw_Index& operator = (const spw_Index& right) { + if (this == &right) { + return *this; + } + name_ = right.name_; + columns_ = right.columns_; + unique_ = right.unique_; + dropped_ = right.dropped_; + return *this; + } + + spw_Index(const spw_Index& right) { + *this = right; + } + + bool operator == (const spw_Index& right) const { + return dropped_ == right.dropped_ && unique_ == right.unique_ && columns_ == right.columns_; + } + + const char* getName() const override { + return name_.c_str(); + } + + void setName(const Str& name) { + name_ = name; + } + + uint32_t getColumnIds(uint32_t* ids, uint32_t len) const override { + if ( columns_.length() > len ) return -1; + for ( uint32_t i=0; i Indexes; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, spw_Index& index) { + buffer >> index.name_ >> index.columns_ >> index.unique_ >> index.dropped_; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const spw_Index& index) { + buffer << index.name_ << index.columns_ << index.unique_ << index.dropped_; + return buffer; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Alteration implementation class +////////////////////////////////////////////////////////////////////////////////////////////////////// + + +class spw_Alteration : public Alteration +{ + friend ByteBuffer& operator >> (ByteBuffer& buffer, spw_Alteration& alteration); + friend ByteBuffer& operator << (ByteBuffer& buffer, const spw_Alteration& alteration); + +private: + + // Alteration type. + AlterationType type_; + + // Alteration serial. + uint32_t serial_; + + // Alteration id. + uint32_t id_; + +public: + + spw_Alteration() : type_(ALT_UNKNOWN), serial_(0), id_(0) { + } + + spw_Alteration(const AlterationType type, const uint32_t serial, const uint32_t id) + : type_(type), serial_(serial), id_(id) { + } + + spw_Alteration& operator = (const spw_Alteration& right) { + if (this == &right) { + return *this; + } + type_ = right.type_; + serial_ = right.serial_; + id_ = right.id_; + return *this; + } + + spw_Alteration(const spw_Alteration& right) { + *this = right; + } + + AlterationType getType() const override { + return type_; + } + + uint32_t getSerial() const override { + return serial_; + } + + uint32_t getId() const override { + return id_; + } +}; + +typedef SYSvector Alterations; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, spw_Alteration& alteration) { + int type; + buffer >> type; + alteration.type_ = static_cast(type); + buffer >> alteration.serial_ >> alteration.id_; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const spw_Alteration& alteration) { + buffer << static_cast(alteration.type_) << alteration.serial_ << alteration.id_; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ForeignKey +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Describes a table foreign key. +class spw_ForeignKey : public ForeignKey +{ + friend ByteBuffer& operator >> (ByteBuffer& buffer, spw_ForeignKey& foreignKey); + friend ByteBuffer& operator << (ByteBuffer& buffer, const spw_ForeignKey& foreignKey); + friend class spw_Table; + +private: + + // Foreign key name. + Str name_; + + // Index of the column the foreign key is defined on. + uint32_t columnId_; + + // Referenced database + Str databaseName_; + + // Referenced table + Str tableName_; + + // Referenced column + Str columnName_; + +public: + + spw_ForeignKey() : columnId_(0) { + } + + spw_ForeignKey(const char* name, uint32_t columnId, const char* databaseName, const char* tableName, + const char* columnName) + : name_(name), columnId_(columnId), databaseName_(databaseName), + tableName_(tableName), columnName_(columnName) { + } + + spw_ForeignKey& operator = (const spw_ForeignKey& right) { + if (this == &right) { + return *this; + } + name_ = right.name_; + columnId_ = right.columnId_; + databaseName_ = right.databaseName_; + tableName_ = right.tableName_; + columnName_ = right.columnName_; + return *this; + } + + spw_ForeignKey(const ForeignKey& right) { + *this = right; + } + + const char* getName() const override { + return name_.c_str(); + } + + uint32_t getColumnId() const override { + return columnId_; + } + + const char* getDatabaseName() const override { + return databaseName_.c_str(); + } + + const char* getTableName() const override { + return tableName_.c_str(); + } + + const char* getColumnName() const override { + return columnName_.c_str(); + } + + bool operator == (const spw_ForeignKey& right) const { + return name_ == right.name_ && columnId_ == right.columnId_ && databaseName_ == right.databaseName_ + && tableName_ == right.tableName_ && columnName_ == right.columnName_; + } +}; + +typedef SYSvector ForeignKeys; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, spw_ForeignKey& foreignKey) { + buffer >> foreignKey.name_ >> foreignKey.columnId_ >> foreignKey.databaseName_ + >> foreignKey.tableName_>> foreignKey.columnName_; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const spw_ForeignKey& foreignKey) { + buffer << foreignKey.name_ << foreignKey.columnId_ << foreignKey.databaseName_ + << foreignKey.tableName_ << foreignKey.columnName_; + return buffer; +} + +// Time period: milliseconds since epoch (1970). +typedef Interval TimePeriod; + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsServer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_DnsServer : public DnsServer +{ + friend ByteBuffer& operator >> (ByteBuffer& buffer, spw_DnsServer& dns); + friend ByteBuffer& operator << (ByteBuffer& buffer, const spw_DnsServer& dns); + friend class spw_Table; + +private: + + Str host_; + uint32_t port_; + Str sourceAddress_; + uint32_t sourcePort_; + +public: + + spw_DnsServer() : port_(0), sourcePort_(0) { + } + + spw_DnsServer(const char* host, const uint32_t port, const char* sourceAddress, const uint32_t sourcePort) + : host_(host), port_(port), sourceAddress_(sourceAddress), sourcePort_(sourcePort) { + } + + spw_DnsServer& operator = (const spw_DnsServer& right) { + host_ = right.host_; + port_ = right.port_; + sourceAddress_ = right.sourceAddress_; + sourcePort_ = right.sourcePort_; + return *this; + } + + bool operator == (const spw_DnsServer& right) const { + return host_ == right.host_ && port_ == right.port_ + && sourceAddress_ == right.sourceAddress_ && sourcePort_ == right.sourcePort_; + } + + bool operator < (const spw_DnsServer& right) const { + int cmp = host_.compareTo(right.host_, false); + if (cmp < 0) { + return true; + } else if (cmp > 0) { + return false; + } + if (port_ < right.port_) { + return true; + } else if (port_ > right.port_) { + return false; + } + cmp = sourceAddress_.compareTo(right.sourceAddress_, false); + if (cmp < 0) { + return true; + } else if (cmp > 0) { + return false; + } + if (sourcePort_ < right.sourcePort_) { + return true; + } else if (sourcePort_ > right.sourcePort_) { + return false; + } + return false; + } + + spw_DnsServer(const spw_DnsServer& right) { + *this = right; + } + + ~spw_DnsServer() { + } + + const char* getHost() const override { + return host_.c_str(); + } + + uint32_t getPort() const override { + return port_; + } + + const char* getSourceAddr() const override { + return sourceAddress_.c_str(); + } + + uint32_t getSourcePort() const override { + return sourcePort_; + } + + Str print() const { + char buffer[1024]; + snprintf(buffer, sizeof(buffer), "host=%s, port=%u, sourceAddress=%s, sourcePort=%u", getHost(), getPort(), getSourceAddr(), getSourcePort()); + return Str(buffer); + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const spw_DnsServer& dns) { + buffer << dns.host_ << dns.port_ << dns.sourceAddress_ << dns.sourcePort_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, spw_DnsServer& dns) { + buffer >> dns.host_ >> dns.port_ >> dns.sourceAddress_ >> dns.sourcePort_; + return buffer; +} + +typedef SYSsortedVector DnsServers; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsConfigId +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsConfigId { + friend ByteBuffer& operator >> (ByteBuffer& buffer, DnsConfigId& id); + friend ByteBuffer& operator << (ByteBuffer& buffer, const DnsConfigId& id); + +private: + + int id_; // DNS identifier. + DnsServers servers_; // DNS servers. + +public: + + DnsConfigId() : id_(-1) { + } + + DnsConfigId(int id) : id_(id) { + } + + DnsConfigId(int id, const DnsServers& servers) : id_(id), servers_(servers) { + } + + int getId() const { + return id_; + } + + bool operator == (const DnsConfigId& right) const { + return id_ == right.id_; + } + + const DnsServers& getServers() const { + return servers_; + } + + DnsServers& getServers() { + return servers_; + } + + uint32_t hash() const { + return id_; + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const DnsConfigId& id) { + buffer << id.id_ << id.servers_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, DnsConfigId& id) { + buffer >> id.id_ >> id.servers_; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsConfiguration +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsConfiguration : public SYSvector { + +public: + DnsConfiguration() : SYSvector(1) + {} +}; + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Partition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_Partition : public Partition +{ +protected: + + uint64_t serial_; + uint64_t dataSerial_; + uint32_t filesystem_; + uint32_t indexAlterSerial_; // Index alteration serial number; to be compared to Master::indexAlterSerial_. + uint32_t columnAlterSerial_; // Column alteration serial number; to be compared to Column::serial_. + +public: + + spw_Partition(const uint64_t serial, const uint64_t dataSerial, const uint32_t filesystem, const uint32_t indexAlterSerial, + const uint32_t columnAlterSerial) + : serial_(serial), dataSerial_(dataSerial), filesystem_(filesystem),indexAlterSerial_(indexAlterSerial), + columnAlterSerial_(columnAlterSerial) { + } + + virtual ~spw_Partition() { + } + + // Attributes. + uint64_t getSerial() const override { + return serial_; + } + + uint64_t getDataSerial() const { + return dataSerial_; + } + + bool isMain() const { + return getSerial() == getDataSerial(); + } + + uint32_t getFilesystem() const override { + return filesystem_; + } + + uint32_t getIndexAlterSerial() const override { + return indexAlterSerial_; + } + + uint32_t getColumnAlterSerial() const { + return columnAlterSerial_; + } + + // Comparison. + bool operator == (const spw_Partition& right) const { + return serial_ == right.serial_; + } + + bool operator < (const spw_Partition& right) const { + return serial_ < right.serial_; + } + + // Hash. + uint32_t hash() const { + return 31 + static_cast (serial_ ^ (serial_ >> 32)); + } +}; + +typedef SYSpSortedVector Partitions; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PersistentPartition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class spw_PersistentPartition : public spw_Partition { + friend ByteBuffer& operator >> (ByteBuffer& buffer, spw_PersistentPartition& partition); + friend ByteBuffer& operator << (ByteBuffer& buffer, const spw_PersistentPartition& partition); + +private: + + uint32_t version_; // See PersistentPartition::currentVersion_. + TimePeriod period_; + uint64_t fileTime_; + uint32_t records_; + uint64_t dataSize_; + uint64_t indexSize_; + uint64_t dataRecords_; // Number of records in data file. + uint64_t recordOffset_; // Record offset in main partition. + ColumnIds skippedColumnIds_; // Skipped columns. Columns for which all values are NULL are not stored. + +public: + + spw_PersistentPartition(uint32_t version, const uint64_t serial, const uint64_t dataSerial, const uint32_t filesystem, const uint32_t indexAlterSerial, + const uint32_t columnAlterSerial, const TimePeriod& period, const uint32_t records, const uint64_t dataSize, const uint64_t indexSize, + const uint64_t dataRecords, const uint64_t recordOffset) + : spw_Partition(serial, dataSerial, filesystem, indexAlterSerial, columnAlterSerial), version_(version), + period_(period), fileTime_(period.getMin()), records_(records), dataSize_(dataSize), indexSize_(indexSize), dataRecords_(dataRecords), recordOffset_(recordOffset) { + } + + // Deserialization constructor. + spw_PersistentPartition() : spw_Partition(0, 0, 0, 0, 0) { + } + + ~spw_PersistentPartition() { + } + + uint32_t getVersion() const { + return version_; + } + + TimePeriod getPeriod() const { + return period_; + } + + uint64_t getFileTime() const { + return fileTime_; + } + + uint32_t getRecords() const { + return records_; + } + + uint64_t getDataSize() const { + return dataSize_; + } + + uint64_t getIndexSize() const { + return indexSize_; + } + + uint64_t getDataRecords() const { + return dataRecords_; + } + + uint64_t getRecordOffset() const { + return recordOffset_; + } + + const ColumnIds& getSkippedColumns() { + return skippedColumnIds_; + } +}; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, spw_PersistentPartition& partition) { + buffer >> partition.version_ >> partition.serial_ >> partition.dataSerial_ >> partition.dataRecords_ + >> partition.recordOffset_ >> partition.period_ >> partition.fileTime_ >> partition.records_ >> partition.dataSize_ + >> partition.indexSize_ >> partition.filesystem_ >> partition.indexAlterSerial_ + >> partition.columnAlterSerial_ >> partition.skippedColumnIds_; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const spw_PersistentPartition& partition) { + buffer << partition.version_ << partition.serial_ << partition.dataSerial_ << partition.dataRecords_ + << partition.recordOffset_ << partition.period_ << partition.fileTime_ << partition.records_ << partition.dataSize_ + << partition.indexSize_ << partition.filesystem_ << partition.indexAlterSerial_ + << partition.columnAlterSerial_ << partition.skippedColumnIds_; + return buffer; +} + +} + +#endif /* #ifndef _spw_api_impl_types_h_ */ diff --git a/storage/sparrow/api/str.h b/storage/sparrow/api/str.h new file mode 100644 index 000000000000..3d83d1f8fa61 --- /dev/null +++ b/storage/sparrow/api/str.h @@ -0,0 +1,309 @@ +#ifndef _spw_api_str_h_ +#define _spw_api_str_h_ + +#include "memalloc.h" +#include "m_string.h" + +#include "serial.h" +#include + + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Str +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Simple string class, not optimized for performance. +class Str { + friend ByteBuffer& operator >> (ByteBuffer& buffer, Str& s); + friend ByteBuffer& operator << (ByteBuffer& buffer, const Str& s); + +private: + + static const char* empty_; + const char* s_; + uint32_t owned_:1; + uint32_t length_:31; + + void empty() { + s_ = empty_; + owned_ = false; + length_ = 0; + } + + void clear() { + if (owned_) { + my_free(const_cast(s_)); + } + s_ = NULL; + } + +public: + + Str() { + empty(); + } + + explicit Str(const char* s, bool owned = true) { + if ( s == NULL || s[0] == '\0' ) { + empty(); + } else { + if (owned) { + s_ = static_cast(my_strdup(s, MYF(MY_FAE))); + owned_ = true; + } else { + s_ = s; + owned_ = false; + } + length_ = static_cast(strlen(s_)); + } + } + + explicit Str(const char* s, int length) { + if ( s == NULL || length == 0 ) { + empty(); + } else { + s_ = static_cast(my_strndup(s, length, MYF(MY_FAE))); + owned_ = true; + length_ = static_cast(strlen(s_)); + } + } + + Str(const Str& s) { + if ( s.length_ == 0 ) { + empty(); + } else { + s_ = static_cast(my_strdup(s.s_, MYF(MY_FAE))); + owned_ = true; + length_ = static_cast(strlen(s_)); + } + } + + Str& operator = (const Str& s) { + if (this == &s) { + return *this; + } + clear(); + if ( s.length_ == 0 ) { + empty(); + } else { + s_ = static_cast(my_strdup(s.s_, MYF(MY_FAE))); + owned_ = true; + length_ = static_cast(strlen(s_)); + } + return *this; + } + + Str& operator = (const char* s) { + clear(); + if ( s == NULL || s[0] == '\0' ) { + empty(); + } else { + s_ = static_cast(my_strdup(s, MYF(MY_FAE))); + owned_ = true; + length_ = static_cast(strlen(s_)); + } + return *this; + } + + ~Str() { + clear(); + } + + int length() const { + return static_cast(length_); + } + + const char* c_str() const { + return s_; + } + + bool isOwned() const { + return owned_; + } + + int compareTo(const Str& s, const bool caseInsensitive) const { + if (caseInsensitive) { + return native_strcasecmp(s_, s.s_); + } else { + return strcmp(s_, s.s_); + } + } + + bool startsWith(const Str& s, const bool caseInsensitive) const { + if (caseInsensitive) { + return native_strncasecmp(s_, s.s_, s.length()) == 0; + } else { + return strncmp(s_, s.s_, s.length()) == 0; + } + } + + bool operator == (const Str& s) const { + return length() == s.length() && compareTo(s, false) == 0; + } + + bool operator != (const Str& s) const { + return !(*this == s); + } + + bool operator < (const Str& s) const { + return compareTo(s, false) < 0; + } + + void toLower() { + if (owned_) { + for (int i = 0; i < length(); ++i) { + const_cast(s_)[i] = tolower(s_[i]); + } + } else { + Str s(*this); + s.toLower(); + *this = s; + } + } + + Str& operator += (const Str& s) { + if (s.length() == 0) { + return *this; + } else if (length() == 0) { + *this = s; + return *this; + } + int l = length(); + int sl = s.length(); + char* ns = static_cast(my_malloc(l + sl + 1, MYF(MY_FAE))); + memcpy(ns, s_, l); + memcpy(ns + l, s.s_, sl + 1); + clear(); + s_ = ns; + owned_ = true; + length_ = static_cast(strlen(s_)); + return *this; + } + + Str& operator += (const char* s) { + SPW_ASSERT(s != 0); + if (strlen(s) == 0) { + return *this; + } else if (length() == 0) { + *this = s; + return *this; + } + int l = length(); + size_t sl = strlen(s); + char* ns = static_cast(my_malloc(l + sl + 1, MYF(MY_FAE))); + memcpy(ns, s_, l); + memcpy(ns + l, s, sl + 1); + clear(); + s_ = ns; + owned_ = true; + length_ = static_cast(strlen(s_)); + return *this; + } + + uint32_t hash() const { + uint32_t h = 1; + int off = 0; + for (;;) { + // Hash is case insensitive. + const uint8_t v = static_cast(tolower(s_[off])); + if (v == 0) { + break; + } + ++off; + h = 31 * h + v; + } + return h; + } + + // Timestamp is in milliseconds. + static Str fromTimestamp(const uint64_t timestamp) { + const uint32_t milliseconds = timestamp % 1000; + time_t tt = static_cast(timestamp / 1000); + struct tm *t; + t = localtime(&tt); + char buffer[32]; + snprintf(buffer, sizeof(buffer), "%04d/%02d/%02d %2d:%02d:%02d.%03u", 1900 + t->tm_year, t->tm_mon + 1, t->tm_mday, + t->tm_hour, t->tm_min, t->tm_sec, milliseconds); + return Str(buffer); + } + + // Duration is in milliseconds. + static Str fromDuration(const uint64_t duration) { + char buffer[128]; + const uint32_t milliseconds = static_cast(duration % 1000); + if (duration < 1000) { + snprintf(buffer, sizeof(buffer), "%ums", milliseconds); + } else if (duration < 60000) { + if (milliseconds == 0) { + snprintf(buffer, sizeof(buffer), "%us", static_cast(duration / 1000)); + } else { + snprintf(buffer, sizeof(buffer), "%us%03ums", static_cast(duration / 1000), milliseconds); + } + } else if (duration < 3600000) { + snprintf(buffer, sizeof(buffer), "%um", static_cast(duration / 60000)); + } else if (duration < 86400000) { + const uint minutes = static_cast((duration % 3600000) / 60000); + if (minutes == 0) { + snprintf(buffer, sizeof(buffer), "%uh", static_cast(duration / 3600000)); + } else { + snprintf(buffer, sizeof(buffer), "%uh%um", static_cast(duration / 3600000), minutes); + } + } else { + const uint hours = static_cast((duration % 86400000) / 3600000); + if (hours == 0) { + snprintf(buffer, sizeof(buffer), "%ud", static_cast(duration / 86400000)); + } else { + snprintf(buffer, sizeof(buffer), "%ud%uh", static_cast(duration / 86400000), hours); + } + } + return Str(buffer); + } + + // Size is in bytes. + static Str fromSize(const uint64_t size) { + char buffer[128]; + if (size < static_cast(1024)) { + snprintf(buffer, sizeof(buffer), "%llu", static_cast(size)); + } else if (size < static_cast(1024) * 1024) { + snprintf(buffer, sizeof(buffer), "%llu KB", static_cast(size / 1024)); + } else if (size < static_cast(1024) * 1024 * 1024) { + snprintf(buffer, sizeof(buffer), "%llu MB", static_cast(size / 1024 / 1024)); + } else if (size < static_cast(1024) * 1024 * 1024 * 1024) { + snprintf(buffer, sizeof(buffer), "%.1f GB", static_cast(size) / 1024 / 1024 / 1024); + } else { + snprintf(buffer, sizeof(buffer), "%.1f TB", static_cast(size) / 1024 / 1024 / 1024 / 1024); + } + return Str(buffer); + } +}; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, Str& s) { + int length; + buffer >> length; + s.clear(); + s.s_ = static_cast(my_malloc(length + 1, MYF(MY_FAE))); + s.owned_ = true; + ByteBuffer contents(reinterpret_cast(s.s_), length); + buffer >> contents; + const_cast(s.s_)[length] = 0; + s.length_ = length; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const Str& s) { + int length = s.length(); + buffer << length << ByteBuffer(reinterpret_cast(s.c_str()), length); + return buffer; +} + +inline static Str operator + (const Str& left, const Str& right) { + Str s = left; + s += right; + return s; +} + +} + +#endif /* #ifndef _spw_api_str_h_ */ diff --git a/storage/sparrow/api/thread.cc b/storage/sparrow/api/thread.cc new file mode 100644 index 000000000000..f9bbf43bf210 --- /dev/null +++ b/storage/sparrow/api/thread.cc @@ -0,0 +1,14 @@ +#include "memalloc.h" +#include "thread.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Thread +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Lock Thread::lock_(true, "Thread::lock_"); + + +} + diff --git a/storage/sparrow/api/thread.h b/storage/sparrow/api/thread.h new file mode 100644 index 000000000000..d4d985a817c2 --- /dev/null +++ b/storage/sparrow/api/thread.h @@ -0,0 +1,117 @@ +#ifndef _spw_api_thread_h_ +#define _spw_api_thread_h_ + +#include "cond.h" +#include "mysql/psi/mysql_thread.h" +//#include "include/my_pthread.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Thread +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Thread { +public: + + // Lock used by all start/stop condition variables. + static Lock lock_; + +private: + + char* m_name_{nullptr}; + my_thread_handle thread_; + volatile bool running_; + volatile bool stop_; // To signal a stop command to the thread + Cond startCond_; + Cond stopCond_; + +protected: + my_thread_t threadId_; + +public: + + // TODO: understand why the version with (Str() + Str()).c_str() does not compile + //Thread(const char* name) : running_(false), stop_(false), startCond_(false, lock_, (Str(name) + Str("::startCond_")).c_str()), + // stopCond_(false, lock_, (Str(name) + Str("::stopCond_")).c_str()) { + Thread(const char* name) : running_(false), stop_(false), startCond_(false, lock_, "::startCond_"), + stopCond_(false, lock_, "::stopCond_"), threadId_(0) { + if (name != nullptr) { + m_name_ = my_strdup(name, MYF(MY_FAE)); + } + } + + virtual ~Thread() { + if (m_name_ != nullptr) { + my_free(const_cast(m_name_)); + } + } + + bool start() { + my_thread_attr_t attr; + my_thread_attr_init(&attr); + my_thread_attr_setdetachstate(&attr, MY_THREAD_CREATE_DETACHED); + my_thread_attr_setstacksize(&attr, 262144); + Guard guard(lock_); + if (my_thread_create(&thread_, &attr, reinterpret_cast(handler), static_cast(this)) != 0 ) { + return false; + } + startCond_.wait(true); + return true; + } + + void stop() { + if ( running_ ) { + Guard guard(lock_); + stop_ = true; + notifyStop(); + stopCond_.wait(2000, true); + } + } + + void join() { + my_thread_join(&thread_, nullptr); + } + + bool isRunning() const { return running_; } + +protected: + + void stopping() { running_ = false; } + + virtual bool process() = 0; + + virtual void notifyStop() = 0; + + virtual bool deleteAfterExit() = 0; + +private: + + static void* handler(void *p) { + Thread* thread = (Thread*)p; + thread->running_ = true; + thread->threadId_ = my_thread_self(); + thread->startCond_.signal(); + while (!thread->stop_) { + if (!thread->process()) { + break; + } + } + thread->running_ = false; + + PRINT_DBUG("Thread stopped!"); + thread->stopCond_.signal(); + /*if (thread->stop_) { + thread->stopCond_.signal(); + } else {*/ + if (thread->deleteAfterExit()) { + delete thread; + } + //} + return 0; + } +}; + +} + +#endif /* #ifndef _spw_api_thread_h_ */ diff --git a/storage/sparrow/api/vec.h b/storage/sparrow/api/vec.h new file mode 100644 index 000000000000..367cc7f0519c --- /dev/null +++ b/storage/sparrow/api/vec.h @@ -0,0 +1,1411 @@ +/* + Vector types. + */ + +#ifndef _spw_api_vec_h_ +#define _spw_api_vec_h_ + +#include "list.h" + +namespace Sparrow { + +// Constant for "not found". +#ifndef SYS_NPOS +#define SYS_NPOS (~(static_cast(0))) +#endif + +// +// Default allocator for vectors +// +template class SYSallocator { +private: + + uint32_t capacity_; + +public: + + SYSallocator() : capacity_(0) { + } + uint32_t getCount() const; + void resetCount(); + T* build(uint32_t n); + void destroy(T* p); +}; + +template inline uint32_t SYSallocator::getCount() const { + return capacity_; +} + +template inline void SYSallocator::resetCount() { + capacity_ = 0; +} + +template inline T* SYSallocator::build(uint32_t n) { + capacity_ = n; + return new T[n]; +} + +template inline void SYSallocator::destroy(T* p) { + delete[] p; +} + +// +// SYSarray: simple array +// +template > class SYSarray: public A { +public: + + SYSarray(const uint32_t size = 0); + SYSarray(const uint32_t size, const T& init); + SYSarray(const SYSarray& right); + ~SYSarray(); + + // accessors + uint32_t length() const; + const T& operator [](const uint32_t index) const; + T& operator [](const uint32_t index); + const T* data() const { + return array_; + } + T* data() { + return array_; + } + + // operations + void clear(); + void reshape(const uint32_t n, const bool doCopy = true); + + // operators + SYSarray& operator =(const SYSarray& right); + bool operator ==(const SYSarray& right) const; + +protected: + + void copy(T* destination, const T* source, uint32_t n); + +protected: + + T* array_; +}; + +template inline void SYSarray::copy(T* destination, const T* source, uint32_t n) { + SPW_ASSERT(source != destination && n > 0); + + // in case arrays overlap + if (destination < source) { + while (n-- > 0) { + *destination++ = *source++; + } + } else { + destination += n; + source += n; + while (n-- > 0) { + *--destination = *--source; + } + } +} + +template inline uint32_t SYSarray::length() const { + return (array_ == 0 ? 0 : this->getCount()); +} + +template inline void SYSarray::reshape(const uint32_t n, const bool doCopy /* = true */) { + const uint32_t l = length(); + if (n == 0) { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + } + array_ = 0; + } else if (n != l) { + T* newArray = this->build(n); + if (array_ != 0) { + if (doCopy && l > 0) { + copy(newArray, array_, l > n ? n : l); + } + this->destroy(array_); + } + array_ = newArray; + } +} + +// constructors +template inline SYSarray::SYSarray(const uint32_t size /* = 0 */) { + array_ = 0; + reshape(size, false); +} + +template inline SYSarray::SYSarray(const uint32_t size, const T& init) { + array_ = 0; + reshape(size, false); + for (uint32_t i = 0; i < size; ++i) { + array_[i] = init; + } +} + +template inline const T& SYSarray::operator [](const uint32_t index) const { + SPW_ASSERT(index < length()); + return array_[index]; +} + +template inline T& SYSarray::operator [](const uint32_t index) { + SPW_ASSERT(index < length()); + return array_[index]; +} + +template inline void SYSarray::clear() { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + array_ = 0; + } +} + +template inline SYSarray::~SYSarray() { + clear(); +} + +template inline SYSarray& SYSarray::operator =(const SYSarray& right) { + if (this == &right) { + return *this; + } + const uint32_t l = right.length(); + reshape(l, false); + if (l > 0) { + copy(array_, right.array_, l); + } + return *this; +} + +template inline bool SYSarray::operator ==(const SYSarray& right) const { + if (length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < length(); ++i) { + if (!((*this)[i] == right[i])) { + return false; + } + } + return true; +} + +// copy constructor +template inline SYSarray::SYSarray(const SYSarray& right) { + array_ = 0; + *this = right; +} + +// +// SYSvector: simple vector +// note: SYSvector inherits from the allocator to perform empty base optimization (EBO) +// +template > class SYSvector: public A { +public: + + SYSvector(const uint32_t size = 0); + SYSvector(const SYSvector& right); + ~SYSvector(); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + uint32_t length() const; + uint32_t capacity() const; + const T& operator [](const uint32_t index) const; + T& operator [](const uint32_t index); + const T& first() const; + T& first(); + const T& last() const; + T& last(); + uint32_t index(const T& t) const; + bool contains(const T& t) const; + const T* data() const; + + // operations + void insertAt(const uint32_t index, const T& t); + void removeAt(const uint32_t index); + void removeFirst(); + void removeLast(); + bool remove(const T& t); + uint32_t append(const T& t); + void insert(const T& t); + void resize(const uint32_t n, const bool canShrink = false, const bool doCopy = true); + void reshape(const uint32_t n, const bool doCopy = true); + void clear(); + void forceLength(const uint32_t length) { + if (length <= capacity()) { + n_ = length; + } + } + + // operators + SYSvector& operator =(const SYSvector& right); + bool operator ==(const SYSvector& right) const; + +protected: + + void copy(T* destination, const T* source, uint32_t n); + +protected: + + T* array_; + uint32_t n_; +}; + +template inline void SYSvector::copy(T* destination, const T* source, uint32_t n) { + SPW_ASSERT(source != destination); + + // in case arrays overlap + if (destination < source) { + while (n-- > 0) { + *destination++ = *source++; + } + } else { + destination += n; + source += n; + while (n-- > 0) { + *--destination = *--source; + } + } +} + +template inline uint32_t SYSvector::capacity() const { + return (array_ == 0 ? 0 : this->getCount()); +} + +template inline void SYSvector::resize(const uint32_t n, bool canShrink /* = false */, bool doCopy /* = true */) { + // cannot shrink under the number of elements, unless specified + if (!canShrink && n < n_) + return; + if (n == 0) { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + } + array_ = 0; + } else if (n != capacity()) { + T* newArray = this->build(n); + if (array_ != 0) { + if (doCopy && n_ > 0) { + copy(newArray, array_, n_ > n ? n : n_); + } + this->destroy(array_); + } + array_ = newArray; + } +} + +template inline void SYSvector::reshape(const uint32_t n, const bool doCopy /* = true */) { + // shrinking allowed + resize(n, true, doCopy); + n_ = n; +} + +template inline void SYSvector::insertAt(const uint32_t index, const T& t) { + SPW_ASSERT(index <= n_); + uint32_t size = capacity(); + T* dest = array_; + SPW_ASSERT(size >= n_); + if (size == n_) { + T* newArray = this->build(n_ + (G == 0 ? 1 : G)); + dest = newArray; + if (index > 0) { + copy(dest, array_, index); + } + } + if (n_ > index) { + copy(dest + index + 1, array_ + index, n_ - index); + } + dest[index] = t; + n_++; + if (dest != array_) { + if (array_ != 0) { + this->destroy(array_); + } + array_ = dest; + } +} + +template inline uint32_t SYSvector::append(const T& t) { + uint32_t index = n_; + insertAt(n_, t); + return index; +} + +template inline void SYSvector::insert(const T& t) { + insertAt(n_, t); +} + +template inline void SYSvector::removeAt(const uint32_t index) { + SPW_ASSERT(index < n_); + if (n_ == 1 && G == 0) { + this->destroy(array_); + this->resetCount(); + array_ = 0; + n_ = 0; + } else { + T* dest = array_; + if (G == 0) { + T* newArray = this->build(n_ - 1); + dest = newArray; + if (index > 0) { + copy(dest, array_, index); + } + } + n_--; + if (n_ > index) { + copy(dest + index, array_ + index + 1, n_ - index); + } + if (dest != array_) { + this->destroy(array_); + array_ = dest; + } else { + array_[n_] = T(); + } + } +} + +template inline void SYSvector::removeFirst() { + removeAt(0); +} + +template inline void SYSvector::removeLast() { + SPW_ASSERT(n_ > 0); + removeAt(n_ - 1); +} + +// constructor +template inline SYSvector::SYSvector(const uint32_t size /* = 0 */) { + array_ = 0; + n_ = 0; + resize(size); +} + +template inline uint32_t SYSvector::entries() const { + return n_; +} + +template inline bool SYSvector::isEmpty() const { + return n_ == 0; +} + +template inline uint32_t SYSvector::length() const { + return n_; +} + +template inline const T& SYSvector::operator [](const uint32_t index) const { + SPW_ASSERT(index < n_); + return array_[index]; +} + +template inline T& SYSvector::operator [](const uint32_t index) { + SPW_ASSERT(index < n_); + return array_[index]; +} + +template inline void SYSvector::clear() { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + array_ = 0; + n_ = 0; + } +} + +template inline SYSvector::~SYSvector() { + clear(); +} + +template inline SYSvector& SYSvector::operator =(const SYSvector& right) { + if (this == &right) { + return *this; + } + clear(); + resize(right.capacity()); + const uint32_t l = right.length(); + if (l > 0) { + copy(array_, right.array_, l); + } + n_ = l; + return *this; +} + +template inline bool SYSvector::operator ==(const SYSvector& right) const { + if (length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < length(); ++i) { + if (!((*this)[i] == right[i])) { + return false; + } + } + return true; +} + +// copy constructor +template inline SYSvector::SYSvector(const SYSvector& right) : A() { + array_ = 0; + n_ = 0; + *this = right; +} + +template inline const T& SYSvector::first() const { + return (*this)[0]; +} + +template inline T& SYSvector::first() { + return (*this)[0]; +} + +template inline const T& SYSvector::last() const { + SPW_ASSERT(n_ > 0); + return (*this)[n_ - 1]; +} + +template inline T& SYSvector::last() { + SPW_ASSERT(n_ > 0); + return (*this)[n_ - 1]; +} + +template inline uint32_t SYSvector::index(const T& t) const { + for (uint32_t i = 0; i < n_; ++i) { + if (array_[i] == t) { + return i; + } + } + return SYS_NPOS; +} + +template inline bool SYSvector::remove(const T& t) { + const uint32_t i = index(t); + const bool found = (i != SYS_NPOS); + if (found) { + removeAt(i); + } + return found; +} + +template inline bool SYSvector::contains(const T& t) const { + return (index(t) != SYS_NPOS); +} + +template inline const T* SYSvector::data() const { + return array_; +} + +// +// SYSpVector: vector of pointers +// +template > class SYSpVector: public SYSvector { +public: + + SYSpVector(const uint32_t size = 0) : SYSvector(size) { + } + + // accessors + T* find(const T* t) const; + uint32_t index(const T* t) const; + bool contains(const T* t) const; + + // operations + T* remove(const T* t, bool destroy); + T* removeAt(const uint32_t index, bool destroy); + T* removeFirst(bool destroy); + T* removeLast(bool destroy); + void clearAndDestroy(); + + // operators + bool operator ==(const SYSpVector& right) const; +}; + +template inline uint32_t SYSpVector::index(const T* t) const { + for (uint32_t i = 0; i < this->length(); ++i) { + T* v = (*this)[i]; + if (*v == *t) { + return i; + } + } + return SYS_NPOS; +} + +template inline T* SYSpVector::find(const T* t) const { + const uint32_t i = index(t); + if (i == SYS_NPOS) { + return 0; + } else { + return (*this)[i]; + } +} + +template inline bool SYSpVector::contains(const T* t) const { + return (index(t) != SYS_NPOS); +} + +template inline T* SYSpVector::remove(const T* t, bool destroy) { + T* result = 0; + for (uint32_t i = 0; i < this->length(); ++i) { + T* v = (*this)[i]; + if (*v == *t) { + result = (*this)[i]; + this->removeAt(i); + break; + } + } + if ( destroy && result != 0 ) { + delete result; + return NULL; + } + return result; +} + +template inline T* SYSpVector::removeAt(const uint32_t index, bool destroy) { + T* result = (*this)[index]; + SYSvector::removeAt(index); + if ( destroy && result != 0 ) { + delete result; + return NULL; + } + return result; +} + +template inline T* SYSpVector::removeLast(bool destroy) { + T* result = this->last(); + SYSvector::removeLast(); + if ( destroy && result != 0 ) { + delete result; + return NULL; + } + return result; +} + +template inline T* SYSpVector::removeFirst(bool destroy) { + T* result = this->first(); + SYSvector::removeFirst(); + if ( destroy && result != 0 ) { + delete result; + return NULL; + } + return result; +} + +template inline void SYSpVector::clearAndDestroy() { + for (uint32_t i = 0; i < this->length(); ++i) { + delete (*this)[i]; + } + this->clear(); +} + +template inline bool SYSpVector::operator ==(const SYSpVector& right) const { + if (this->length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < this->length(); ++i) { + if (!(*(*this)[i] == *right[i])) { + return false; + } + } + return true; +} + +// +// SYSsortedVector: sorted vector +// +template > class SYSsortedVector: public SYSvector { +public: + + SYSsortedVector(const uint32_t size = 0) : SYSvector(size) { + } + + SYSsortedVector& operator = (const SYSvector& right); + SYSsortedVector(const SYSvector& right); + + // accessors + uint32_t index(const T& t) const; + bool contains(const T& t) const; + + // operations + void insert(const T& t); + bool remove(const T& t); + + // operators + bool operator ==(const SYSsortedVector& right) const; + + bool bsearch(const T& t, uint32_t& index, const int mode) const; + +#ifndef NDEBUG + bool isSorted() const; +#endif +}; + +template inline bool SYSsortedVector::bsearch(const T& t, uint32_t& index, const int mode) const { + // mode = 0: check if object exists + // mode = 1: find first occurrence + // mode = 2: find for insertion + bool result = false; + index = 0; + if (this->n_ > 0) { + uint32_t top = this->n_ - 1; + uint32_t bottom = 0; + while (top > bottom) { + index = (top + bottom) >> 1; + const T& v = (*this)[index]; + if (t == v) { + result = true; + break; + } else if (t < v) { + top = index ? index - 1 : 0; + } else { + bottom = index + 1; + } + } + if (!result) { + index = bottom; + if (t == (*this)[index]) { + result = true; + } + } + if (result) { + if (mode == 1) { + // go down to the first one + while (index > 0 && t == (*this)[index - 1]) { + index--; + } + } else if (mode == 2) { + // found; move up to the insertion position + index++; + while (index < this->n_ && t == (*this)[index]) { + index++; + } + } + } else { + if (mode == 2) { + // not found; move up to the insertion position + while (index < this->n_ && (*this)[index] < t) { + index++; + } + } + } + } + return result; +} + +template inline uint32_t SYSsortedVector::index(const T& t) const { + SPW_ASSERT(isSorted()); + uint32_t index; + if (bsearch(t, index, 1)) { + return index; + } else { + return SYS_NPOS; + } +} + +template inline bool SYSsortedVector::contains(const T& t) const { + SPW_ASSERT(isSorted()); + uint32_t index; + return bsearch(t, index, 0); +} + +template inline void SYSsortedVector::insert(const T& t) { + SPW_ASSERT(isSorted()); + uint32_t index; + bsearch(t, index, 2); + insertAt(index, t); + SPW_ASSERT(isSorted()); +} + +template inline SYSsortedVector& SYSsortedVector::operator = (const SYSvector& right) { + SYSvector::clear(); + const uint32_t n = right.entries(); + SYSvector::resize(n, false, false); + for (uint32_t i = 0; i < n; ++i) { + insert(right[i]); + } + return *this; +} + +template inline SYSsortedVector::SYSsortedVector(const SYSvector& right) { + *this = right; +} + +template inline bool SYSsortedVector::remove(const T& t) { + SPW_ASSERT(isSorted()); + uint32_t index; + if (bsearch(t, index, 1)) { + this->removeAt(index); + SPW_ASSERT(isSorted()); + return true; + } else { + return false; + } +} + +template inline bool SYSsortedVector::operator ==(const SYSsortedVector& right) const { + if (this->length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < this->length(); ++i) { + if (!((*this)[i] == right[i])) { + return false; + } + } + return true; +} + +#ifndef NDEBUG +template inline bool SYSsortedVector::isSorted() const { + if (this->n_ < 2) { + return true; + } + for (uint32_t index = 0; index < this->n_ - 1; index++) { + if (!((*this)[index] < (*this)[index + 1]) && !((*this)[index] == (*this)[index + 1])) { + return false; + } + } + return true; +} +#endif + +// +// SYSpSortedVector: sorted vector of pointers +// +template > class SYSpSortedVector: public SYSvector { +public: + + SYSpSortedVector(const uint32_t size = 0) : SYSvector(size) { + } + + // accessors + uint32_t index(const T* t) const; + bool contains(const T* t) const; + T* find(const T* t) const; + + // operations + void insert(T* t); + T* remove(const T* t); + void clearAndDestroy(); + + // operators + bool operator ==(const SYSpSortedVector& right) const; + + bool bsearch(const T& t, uint32_t& index, const int mode) const; + +#ifndef NDEBUG + bool isSorted() const; +#endif +}; + +template inline bool SYSpSortedVector::bsearch(const T& t, uint32_t& index, const int mode) const { + // mode = 0: check if object exists + // mode = 1: find first occurrence + // mode = 2: find for insertion + bool result = false; + index = 0; + if (this->n_ > 0) { + uint32_t top = this->n_ - 1; + uint32_t bottom = 0; + while (top > bottom) { + index = (top + bottom) >> 1; + const T& v = *((*this)[index]); + if (t == v) { + result = true; + break; + } else if (t < v) { + top = index ? index - 1 : 0; + } else { + bottom = index + 1; + } + } + if (!result) { + index = bottom; + if (t == *((*this)[index])) { + result = true; + } + } + if (result) { + if (mode == 1) { + // go down to the first one + while (index > 0 && t == *((*this)[index - 1])) { + index--; + } + } else if (mode == 2) { + // found; move up to the insertion position + index++; + while (index < this->n_ && t == *((*this)[index])) { + index++; + } + } + } else if (mode == 2) { + // not found; move up to the insertion position + while (index < this->n_ && *((*this)[index]) < t) { + index++; + } + } + } + return result; +} + +template inline uint32_t SYSpSortedVector::index(const T* t) const { + SPW_ASSERT(isSorted()); + uint32_t index; + if (bsearch(*t, index, 1)) { + return index; + } else { + return SYS_NPOS; + } +} + +template inline bool SYSpSortedVector::contains(const T* t) const { + SPW_ASSERT(isSorted()); + uint32_t index; + return bsearch(*t, index, 0); +} + +template inline T* SYSpSortedVector::find(const T* t) const { + SPW_ASSERT(isSorted()); + uint32_t index; + if (bsearch(*t, index, 1)) { + return (*this)[index]; + } else { + return 0; + } +} + +template inline void SYSpSortedVector::insert(T* t) { + SPW_ASSERT(isSorted()); + uint32_t index; + bsearch(*t, index, 2); + insertAt(index, t); + SPW_ASSERT(isSorted()); +} + +template inline T* SYSpSortedVector::remove(const T* t) { + SPW_ASSERT(isSorted()); + uint32_t index; + if (bsearch(*t, index, 1)) { + T* result = (*this)[index]; + this->removeAt(index); + SPW_ASSERT(isSorted()); + return result; + } else { + return 0; + } +} + +template inline void SYSpSortedVector::clearAndDestroy() { + uint32_t i = 0; + for (i = 0; i < this->n_; ++i) { + delete (*this)[i]; + } + this->clear(); +} + +template inline bool SYSpSortedVector::operator ==(const SYSpSortedVector& right) const { + if (this->length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < this->length(); ++i) { + if (!(*(*this)[i] == *right[i])) { + return false; + } + } + return true; +} + +#ifndef NDEBUG +template inline bool SYSpSortedVector::isSorted() const { + if (this->n_ < 2) { + return true; + } + for (uint32_t index = 0; index < this->n_ - 1; index++) { + if (!(*((*this)[index]) < *((*this)[index + 1])) && !(*((*this)[index]) == *((*this)[index + 1]))) { + return false; + } + } + return true; +} +#endif + +// +// SYSlarray: single linked list of arrays. +// +template > class SYSlarray: private SYSslist, public A { +private: + + uint32_t length_; + +public: + + SYSlarray() : length_(0) { + } + + ~SYSlarray(); + + const T& operator[](const uint32_t index) const; + + T& operator[](const uint32_t index); + + void append(const T& t); + + T removeLast(); + + void clear(); + + uint32_t length() const; +}; + +template inline const T& SYSlarray::operator[](const uint32_t index) const { + SPW_ASSERT(index < length_); + SYSslistIterator iterator(*const_cast*>(static_cast*>(this))); + const uint32_t pos = index / G; + uint32_t i = 0; + while (i++ <= pos && ++iterator) { + } + T* a = iterator.key(); + return a[index % G]; +} + +template inline T& SYSlarray::operator[](const uint32_t index) { + SPW_ASSERT(index < length_); + SYSslistIterator iterator(*this); + const uint32_t pos = index / G; + uint32_t i = 0; + while (i++ <= pos && ++iterator) { + } + T* a = iterator.key(); + return a[index % G]; +} + +template inline void SYSlarray::append(const T& t) { + T* a; + if (length_ % G == 0) { + a = this->build(G); + SYSslist::append(a); + } else { + a = SYSslist::last(); + } + a[length_++ % G] = t; +} + +template inline T SYSlarray::removeLast() { + SPW_ASSERT(length_ > 0); + T result = (*this)[length_ - 1]; + --length_; + if (length_ % G == 0) { + T* a = SYSslist::removeAt(length_ / G); + this->destroy(a); + } + return result; +} + +template inline void SYSlarray::clear() { + SYSslistIterator iterator(*this); + while (++iterator) { + this->destroy(iterator.key()); + } + SYSslist::clear(); + length_ = 0; +} + +template inline SYSlarray::~SYSlarray() { + clear(); +} + +template inline uint32_t SYSlarray::length() const { + return length_; +} + +// +// SYSlvector: vector made of multiple small blocks to limit memory usage and fragmentation. +// Allows concurrent reads and appends. +// +template > class SYSlvector: public A { +private: + + SYSlarray array_; + uint32_t length_; + +protected: + + void resize(const uint32_t length); + +public: + + static const int BLOCK_SIZE = 2048; + + SYSlvector(); + + ~SYSlvector(); + + SYSlvector(const SYSlvector& right); + + SYSlvector& operator =(const SYSlvector& right); + + void clear(); + + uint32_t length() const; + + void append(const T& t); + + const T& operator[](const uint32_t index) const; + + const T& first() const; + + const T& last() const; + + T& operator[](const uint32_t index); + + void shrink(const uint32_t length); + + int64_t getSize() const; +}; + +template inline SYSlvector::SYSlvector() : + length_(0) { +} + +template inline void SYSlvector::clear() { + for (uint32_t i = 0; i < array_.length(); ++i) { + this->destroy(array_[i]); + } + array_.clear(); + this->resetCount(); + length_ = 0; +} + +template inline SYSlvector::~SYSlvector() { + clear(); +} + +template inline uint32_t SYSlvector::length() const { + return length_; +} + +template inline void SYSlvector::resize(const uint32_t length) { + const uint32_t oldN = array_.length(); + const uint32_t newN = length == 0 ? 0 : (length + BLOCK_SIZE - 1) / BLOCK_SIZE; + if (oldN < newN) { + for (uint32_t i = oldN; i < newN; ++i) { + array_.append(this->build(BLOCK_SIZE)); + } + } else if (oldN > newN) { + for (uint32_t i = newN; i < oldN; ++i) { + this->destroy(array_.removeLast()); + } + if (newN == 0) { + array_.clear(); + this->resetCount(); + } + } + length_ = length; +} + +template inline void SYSlvector::shrink(const uint32_t length) { + if (length < this->length()) { + resize(length); + } +} + +template inline void SYSlvector::append(const T& t) { + const uint32_t n = length_ / BLOCK_SIZE; + if (n == array_.length()) { + array_.append(this->build(BLOCK_SIZE)); + } + array_[n][length_++ % BLOCK_SIZE] = t; +} + +template inline const T& SYSlvector::operator[](const uint32_t index) const { + SPW_ASSERT(index < length_); + const T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline const T& SYSlvector::first() const { + return (*this)[0]; +} + +template inline const T& SYSlvector::last() const { + return (*this)[length_ - 1]; +} + +template inline T& SYSlvector::operator[](const uint32_t index) { + SPW_ASSERT(index < length_); + T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline SYSlvector& SYSlvector::operator =(const SYSlvector& right) { + if (this != &right) { + clear(); + for (uint32_t i = 0; i < right.length_; ++i) { + append(right[i]); + } + } + return *this; +} + +template inline SYSlvector::SYSlvector(const SYSlvector& right) : + length_(0) { + *this = right; +} + +template inline int64_t SYSlvector::getSize() const { + return length() * sizeof(A); +} + +// +// SYSbitVector: bit vector. +// Allows concurrent reads and appends. +// +template class SYSbitVector: private SYSlvector { + +private: + + uint32_t size_; + +protected: + + void resize(const uint32_t n); + +public: + + SYSbitVector(); + SYSbitVector(const SYSbitVector& right); + ~SYSbitVector(); + + // accessors + bool isEmpty() const; + uint32_t length() const; + bool operator [](const uint32_t index) const; + int64_t getSize() const; + + // operations + void clearBit(const uint32_t offset); + void setBit(const uint32_t offset); + void clear(); + void shrink(const uint32_t length); + + // operators + SYSbitVector& operator =(const SYSbitVector& right); +}; + +template inline bool SYSbitVector::isEmpty() const { + return size_ == 0; +} + +template inline uint32_t SYSbitVector::length() const { + return size_; +} + +template inline void SYSbitVector::clearBit(const uint32_t offset) { + SPW_ASSERT(offset < length()); + uint64_t* p = &SYSlvector::operator[](offset >> 6); + *p &= ~(1ULL << (offset & 63ULL)); +} + +template inline void SYSbitVector::resize(const uint32_t n) { + uint32_t oldN = SYSlvector::length(); + SYSlvector::resize((n + 63) / 64); + const uint32_t newN = SYSlvector::length(); + + // Reset added words. + while (oldN < newN) { + SYSlvector::operator[](oldN++) = 0; + } + + // Reset added bits. + uint32_t nbits = 63 - (size_ % 64); + while (size_ < n && nbits > 0) { + clearBit(size_++); + nbits--; + } + size_ = n; +} + +template inline void SYSbitVector::setBit(const uint32_t offset) { + if (offset >= length()) { + resize(offset + 1); + } + uint64_t* p = &SYSlvector::operator[](offset >> 6); + *p |= (1ULL << (offset & 63ULL)); +} + +template inline SYSbitVector::SYSbitVector() : size_(0) { +} + +template inline void SYSbitVector::clear() { + SYSlvector::clear(); + size_ = 0; +} + +template inline SYSbitVector& SYSbitVector::operator =(const SYSbitVector& right) { + if (this != &right) { + SYSlvector::operator =(right); + size_ = right.size_; + } + return *this; +} + +template inline SYSbitVector::SYSbitVector(const SYSbitVector& right) { + *this = right; +} + +template inline bool SYSbitVector::operator[](const uint32_t index) const { + SPW_ASSERT(index < length()); + return SYSlvector::operator[](index / 64) & (1ULL << (index % 64)); +} + +template inline SYSbitVector::~SYSbitVector() { + clear(); +} + +template inline void SYSbitVector::shrink(const uint32_t length) { + if (length < this->length()) { + resize(length); + } +} + +template inline int64_t SYSbitVector::getSize() const { + return SYSlvector::getSize(); +} + +// +// SYSxvector: vector made of multiple small blocks to limit memory usage and fragmentation. +// Allows concurrent reads, but without appends. +// +template > class SYSxvector: public A { +private: + + SYSpVector array_; + uint32_t length_; + +protected: + + void resize(const uint32_t length); + +public: + + static const int BLOCK_SIZE = 2048; + + SYSxvector(); + + ~SYSxvector(); + + SYSxvector(const SYSxvector& right); + + SYSxvector& operator =(const SYSxvector& right); + + void clear(); + + uint32_t length() const; + + void append(const T& t); + + const T& operator[](const uint32_t index) const; + + const T& first() const; + + const T& last() const; + + T& operator[](const uint32_t index); + + void shrink(const uint32_t length); + + int64_t getSize() const; +}; + +template inline SYSxvector::SYSxvector() : + length_(0) { +} + +template inline void SYSxvector::clear() { + for (uint32_t i = 0; i < array_.entries(); ++i) { + this->destroy(array_[i]); + } + array_.clear(); + this->resetCount(); + length_ = 0; +} + +template inline SYSxvector::~SYSxvector() { + clear(); +} + +template inline uint32_t SYSxvector::length() const { + return length_; +} + +template inline void SYSxvector::resize(const uint32_t length) { + const uint32_t oldN = array_.length(); + const uint32_t newN = length == 0 ? 0 : (length + BLOCK_SIZE - 1) / BLOCK_SIZE; + if (oldN < newN) { + for (uint32_t i = oldN; i < newN; ++i) { + array_.append(this->build(BLOCK_SIZE)); + } + } else if (oldN > newN) { + for (uint32_t i = newN; i < oldN; ++i) { + this->destroy(array_.last()); + array_.removeLast(); + } + if (newN == 0) { + array_.clear(); + this->resetCount(); + } + } + length_ = length; +} + +template inline void SYSxvector::shrink(const uint32_t length) { + if (length < this->length()) { + resize(length); + } +} + +template inline void SYSxvector::append(const T& t) { + const uint32_t n = length_ / BLOCK_SIZE; + if (n == array_.length()) { + array_.append(this->build(BLOCK_SIZE)); + } + array_[n][length_++ % BLOCK_SIZE] = t; +} + +template inline const T& SYSxvector::operator[](const uint32_t index) const { + SPW_ASSERT(index < length_); + const T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline const T& SYSxvector::first() const { + return (*this)[0]; +} + +template inline const T& SYSxvector::last() const { + return (*this)[length_ - 1]; +} + +template inline T& SYSxvector::operator[](const uint32_t index) { + SPW_ASSERT(index < length_); + T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline SYSxvector& SYSxvector::operator =(const SYSxvector& right) { + if (this != &right) { + clear(); + for (uint32_t i = 0; i < right.length_; ++i) { + append(right[i]); + } + } + return *this; +} + +template inline SYSxvector::SYSxvector(const SYSxvector& right) : length_(0) { + *this = right; +} + +template inline int64_t SYSxvector::getSize() const { + return length() * sizeof(A); +} + +} + +#endif /* #ifndef _spw_api_vec_h_ */ diff --git a/storage/sparrow/api_test/CMakeLists.txt b/storage/sparrow/api_test/CMakeLists.txt new file mode 100644 index 000000000000..1409f8e08a44 --- /dev/null +++ b/storage/sparrow/api_test/CMakeLists.txt @@ -0,0 +1,35 @@ +SET(SPARROW_API_TEST_SOURCES + exception.cc + exception.h + sparrow_api_test.cpp + all_types.h + all_types.cpp + column_subset.h + column_subset.cpp + many_partitions.h + many_partitions.cpp + too_many_columns.h + too_many_columns.cpp + vl.h + vl.cpp + column_optim.h + column_optim.cpp + sql.h + sql.cpp + utils.h + common.h + common.cpp + errors.h + errors.cpp) + + +REMOVE_DEFINITIONS(-DSPARROW_API_EXPORTS) + +MYSQL_ADD_EXECUTABLE(sparrow_api_test ${SPARROW_API_TEST_SOURCES} + LINK_LIBRARIES sparrowapi) + +TARGET_INCLUDE_DIRECTORIES(sparrow_api_test PRIVATE ${CMAKE_SOURCE_DIR}/storage/sparrow/api/include) + +# Actually, sparrowapi embeds mysqlclient, but it's clearer if we specify explicitly the mysqlclient. +TARGET_LINK_LIBRARIES(sparrow_api_test sparrowapi mysqlclient) + diff --git a/storage/sparrow/api_test/all_types.cpp b/storage/sparrow/api_test/all_types.cpp new file mode 100644 index 000000000000..1c6a83cd8332 --- /dev/null +++ b/storage/sparrow/api_test/all_types.cpp @@ -0,0 +1,408 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "all_types.h" +#include "my_sys.h" +#include "my_systime.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Create TEST Table + +void TestAlltypes::run() { + try + { + const char* table_name = "table_1"; + + printf("Creating table %s...", table_name); + AutoPtr table( createTable(table_name) ); + printf("OK\n"); + + runMasterFileTest( table.get() ); + + runGetTableTest( table.get() ); + + sendSampleData( table.get() ); + sendErrorData( table.get() ); + sendDataFlow( table.get() ); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestAlltypes::runMasterFileTest( const Table* table ) { + try + { + printf("Retrieving Master file for %s...", table->getTableName()); + const Master* master = connect_->getMasterFile( table->getDatabaseName(), table->getTableName() ); + if ( !master ) + throw MyException::create( false, "Failed to get Master file." ); + // Do some processing on the retrieved Master File information + // ... + + // TODO: use connect->releaseXXX() instead + delete master; master = NULL; + printf("OK\n"); + + } catch ( const MyException& e ) { + printf( "Failed to get Master file: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestAlltypes::runGetTableTest( const Table* table ) { + try + { + printf("Getting Table object for %s...", table->getTableName()); + const Table* tbl = connect_->getTable( table->getDatabaseName(), table->getTableName() ); + if ( !tbl ) + throw MyException::create( false, "Failed to get Table description." ); + delete tbl; tbl = NULL; + printf("OK\n"); + +#ifdef TEST_COALESCING + printf( "Disabling coalescing..." ); + connect->disableCoalescing(10); + printf( "done" ); + + getchar(); + + printf( "Enabling coalescing..." ); + connect->disableCoalescing(0); + printf( "done" ); +#endif // TEST_COALESCING + + + } catch ( const MyException& e ) { + printf( "Failed to get Table: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestAlltypes::runDisableCoalescingGlobTest(bool loop) { + try + { + if (!loop) + { + printf( "Disabling coalescing..." ); + connect_->disableCoalescing(10); + printf( "OK\n" ); + + getchar(); + + printf( "Enabling coalescing..." ); + connect_->disableCoalescing(0); + printf( "OK\n" ); + } else + { + do + { + connect_->disableCoalescing(10); + my_sleep(1000000); // 1sec + connect_->disableCoalescing(0); + } while (true); + } + } catch ( const MyException& e ) { + printf( "Failed to get Table: %s : %s\n", e.getText(), errmsg() ); + } +} + + +void TestAlltypes::runDisableCoalescingSchemaTest(const char* schema, bool loop) +{ + if (schema == NULL || strlen(schema) == 0) + return; + + try + { + if (!loop) + { + printf( "Disabling coalescing for %s...", schema ); + connect_->disableCoalescing(10, schema); + printf( "OK\n" ); + + getchar(); + + printf( "Enabling coalescing..." ); + connect_->disableCoalescing(0, schema); + printf( "OK\n" ); + } + else + { + do { + connect_->disableCoalescing(1, schema); + + my_sleep(1000000); + + connect_->disableCoalescing(0, schema); + } while (true); + } + + } catch ( const MyException& e ) { + printf( "Failed to get Table: %s : %s\n", e.getText(), errmsg() ); + } +} + + +Table* TestAlltypes::createTable( const char* table_name ) +{ + Table* table = connect_->createTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 24*3600*1000; + uint64_t coalescingPeriod = 3600*1000; + uint32_t aggregationPeriod = 300; + uint64_t defaultWhere = 3600*24*1000; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + table->setDefaultWhere(defaultWhere); + + // Columns + int col = 0; + table->appendColumn( "timestamp", col++, COL_TIMESTAMP, 3 ); + table->appendColumn( "slotId", col++, COL_LONG, 0, COL_UNSIGNED ); + table->appendColumn( "status", col++, COL_BYTE, 0, COL_UNSIGNED ); + table->appendColumn( "samples", col++, COL_SHORT, 0, COL_UNSIGNED ); + table->appendColumn( "value", col++, COL_DOUBLE ); + table->appendColumn( "name", col++, COL_STRING, 32 ); + table->appendColumn( "status2", col++, COL_BYTE, 0, COL_UNSIGNED ); + table->appendColumn( "name_nullable", col++, COL_STRING, 10, COL_NULLABLE ); + table->appendColumn( "status3", col++, COL_BYTE, 0, COL_UNSIGNED ); + table->appendColumn( "blob", col++, COL_BLOB, 32 ); + table->appendColumn( "status4", col++, COL_BYTE, 0, COL_UNSIGNED ); + table->appendColumn( "blob_nullable", col++, COL_BLOB, 255, COL_NULLABLE ); + table->appendColumn( "status5", col++, COL_BYTE, 0, COL_UNSIGNED ); + + // Indexes + int indxId; + if ( (indxId=table->appendIndex( "index_1", 0, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + if ( (indxId=table->appendIndex( "index_2", 0, true )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + table->addColToIndex( indxId, 1 ); + if ( (indxId=table->appendIndex( "index_3", 4, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + if ( (indxId=table->appendIndex( "index_4", 2, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + if ( (indxId=table->appendIndex( "index_5", 3, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + if ( (indxId=table->appendIndex( "index_6", 5, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + if ( (indxId=table->appendIndex( "index_7", 6, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + + // Foreign keys + table->appendFK( "fk_1", 1, "DB2", "TEST2", "COL2" ); + + // DNS configuration + int indxDnsEntry; + indxDnsEntry = table->addDnsEntry( -1 ); + table->addDnsServer( indxDnsEntry, "dns1", 12, "10.12.14.16", 2500 ); + table->addDnsServer( indxDnsEntry, "dns2", 15, "20.22.24.26", 4500 ); + + indxDnsEntry = table->addDnsEntry( 0 ); + table->addDnsServer( indxDnsEntry, "dns3", 13, "10.12.14.36", 3500 ); + table->addDnsServer( indxDnsEntry, "dns4", 16, "20.22.24.46", 5500 ); + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + +void TestAlltypes::sendSampleData( const Table* table ) { + + const uint32_t buffSize = 64*1024*512; + + const int nb_iter = 2; + for ( int k=0; kgetTableName(), k+1); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + const int blobLen = 11; // columns length + 1 + uint8_t blob[blobLen]; + for ( int i=0; i spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + const uint32_t nbRows = 4; + MyRow data[nbRows]; + + data[0] = MyRow( now+100, 0, 0x12, 0, "", NULL, 0.0, (uint8_t*)blob, 0, NULL, 0 ); + data[1] = MyRow( now+10, 0, 0x12, 1, "", "", 0.0, (uint8_t*)blob, 1, NULL, 0 ); + data[2] = MyRow( now+1234, 1, 0x12, 0x1234, "a", "a", 1.1, (uint8_t*)blob, 1, (uint8_t*)blob, 1 ); + data[3] = MyRow( now+0x100, 1, 0xCF, 0xFFFF, "abcdefghij", "abcdefghij", 1.1, (uint8_t*)blob, 10, (uint8_t*)blob, 10 ); + + + // Copy the data to the Sparrow Buffer, one row after another + for ( uint32_t i=0; iaddRow( data[i] ); + if ( res == SPW_API_BUFFER_FULL ) { + printf("%u%%, ", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + printf("OK\n"); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } + //getchar(); + } +} + +void TestAlltypes::sendErrorData( const Table* table ) { + + // Error cases + const uint32_t buffSize = 64*1024*512; + try + { + printf("Sending erroneous data samples to table %s [1]...", table->getTableName()); + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + //const int blobLen = 11; // columns length + 1 + //uint8_t blob[blobLen]; + // for ( int i=0; iaddRow( data[i] ); + if ( res == SPW_API_BUFFER_FULL ) { + printf("%u%%, ", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + printf("ASSERT!\n"); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } + + + // Error cases + try + { + printf("Sending erroneous data samples to table %s [2]...", table->getTableName()); + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + const int blobLen = 11; // columns length + 1 + uint8_t blob[blobLen]; + for ( int i=0; iaddRow( data[i] ); + if ( res == SPW_API_BUFFER_FULL ) { + printf("%u%%, ", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + printf("ASSERT!\n"); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } + +} + +void TestAlltypes::sendDataFlow( const Table* table ) +{ + const uint buffSize = 64*1024*512; + const uint nbRows = 10000; + try + { + printf("Sending data flow to table %s...", table->getTableName()); + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + const int blobLen = 11; // columns length + 1 + uint8_t blob[blobLen]; + for ( int i=0; iaddRow( row ); + if ( res == SPW_API_BUFFER_FULL ) { + printf("%u%%, ", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + printf("OK\n"); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + + diff --git a/storage/sparrow/api_test/all_types.h b/storage/sparrow/api_test/all_types.h new file mode 100644 index 000000000000..7b83e8c33c2e --- /dev/null +++ b/storage/sparrow/api_test/all_types.h @@ -0,0 +1,244 @@ +#ifndef _spw_test_all_types_h +#define _spw_test_all_types_h + +#include "common.h" + +using namespace Sparrow; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MyRow : public SparrowRow +{ +public: + uint64_t timestamp_; + uint64_t slotId_; + uint8_t status_; + uint16_t samples_; + char* name_; // string + char* name_null_; // string + double value_; + uint8_t* blob_; + int blobLen_; + uint8_t* blob_null_; + int blob_null_Len_; + +public: + MyRow(); + MyRow(uint64_t, uint64_t, uint8_t, uint16_t, const char*, const char*, double, const uint8_t*, int, const uint8_t*, int); + MyRow(const MyRow&); + ~MyRow(); + + const MyRow& operator =(const MyRow&); + + int decode(SparrowBuffer* buffer, void* dummy) const override; +}; + +inline MyRow::MyRow() + : timestamp_(0), slotId_(0), status_(0), samples_(0), name_(NULL), name_null_(NULL), + value_(0.0), blob_(NULL), blobLen_(0), blob_null_(NULL), blob_null_Len_(0) +{ +} + +inline MyRow::MyRow( uint64_t timestamp, uint64_t slotId, uint8_t status, uint16_t samples, + const char* name, const char* name_null, double value, + const uint8_t* blob, int blobLen, const uint8_t* blob_null, int blob_null_Len) + : timestamp_(timestamp), slotId_(slotId), status_(status), samples_(samples), name_(NULL), name_null_(NULL), + value_(value), blob_(NULL), blobLen_(0), blob_null_(NULL), blob_null_Len_(0) +{ + if ( name != NULL ) { + name_ = new char[strlen(name)+1]; + strcpy( name_, name ); + } + if ( name_null != NULL ) { + name_null_ = new char[strlen(name_null)+1]; + strcpy( name_null_, name_null ); + } + if ( blob != NULL ) { + int len = blobLen > 0 ? blobLen : 1; + blob_ = new uint8_t[len]; + memcpy( blob_, blob, blobLen ); + blobLen_ = blobLen; + } + if ( blob_null != NULL ) { + int len = blob_null_Len > 0 ? blob_null_Len : 1; + blob_null_ = new uint8_t[len]; + memcpy( blob_null_, blob_null, blob_null_Len ); + blob_null_Len_ = blob_null_Len; + } +} + + +inline MyRow::MyRow( const MyRow& right ) +{ + timestamp_ = right.timestamp_; + slotId_ = right.slotId_; + status_ = right.status_; + samples_ = right.samples_; + value_ = right.value_; + + if ( right.name_ != NULL ) { + name_ = new char[strlen(right.name_)+1]; + strcpy( name_, right.name_ ); + } else { + name_ = NULL; + } + + if ( right.name_null_ != NULL ) { + name_null_ = new char[strlen(right.name_null_)+1]; + strcpy( name_null_, right.name_null_ ); + } else { + name_null_ = NULL; + } + + if ( right.blob_ != NULL ) { + int len = right.blobLen_ > 0 ? right.blobLen_ : 1; + blob_ = new uint8_t[len]; + memcpy( blob_, right.blob_, right.blobLen_ ); + blobLen_ = right.blobLen_; + } else { + blob_ = NULL; + blobLen_ = 0; + } + + if ( right.blob_null_ != NULL ) { + int len = right.blob_null_Len_ > 0 ? right.blob_null_Len_ : 1; + blob_null_ = new uint8_t[len]; + memcpy( blob_null_, right.blob_null_, right.blob_null_Len_ ); + blob_null_Len_ = right.blob_null_Len_; + } else { + blob_null_ = NULL; + blob_null_Len_ = 0; + } +} + +inline const MyRow& MyRow::operator = ( const MyRow& right ) +{ + if ( this == &right ) + return *this; + + timestamp_ = right.timestamp_; + slotId_ = right.slotId_; + status_ = right.status_; + samples_ = right.samples_; + value_ = right.value_; + + if ( name_ != NULL ) { + delete [] name_; name_ = NULL; + } + if ( right.name_ != NULL ) { + name_ = new char[strlen(right.name_)+1]; + strcpy( name_, right.name_ ); + } else { + name_ = NULL; + } + + if ( name_null_ != NULL ) { + delete [] name_null_; + } + if ( right.name_null_ != NULL ) { + name_null_ = new char[strlen(right.name_null_)+1]; + strcpy( name_null_, right.name_null_ ); + } else { + name_null_ = NULL; + } + + if ( blob_ != NULL ) { + delete [] blob_; blob_ = NULL; + blobLen_ = 0; + } + if ( right.blob_ != NULL ) { + int len = right.blobLen_ > 0 ? right.blobLen_ : 1; + blob_ = new uint8_t[len]; + memcpy( blob_, right.blob_, right.blobLen_ ); + blobLen_ = right.blobLen_; + } else { + blob_ = NULL; + blobLen_ = 0; + } + + if ( blob_null_ != NULL ) { + delete [] blob_null_; + } + if ( right.blob_null_ != NULL ) { + int len = right.blob_null_Len_ > 0 ? right.blob_null_Len_ : 1; + blob_null_ = new uint8_t[len]; + memcpy( blob_null_, right.blob_null_, right.blob_null_Len_ ); + blob_null_Len_ = right.blob_null_Len_; + } else { + blob_null_ = NULL; + blob_null_Len_ = 0; + } + + return *this; +} + +inline MyRow::~MyRow() +{ + if ( name_ != NULL ) { + delete [] name_; + } + if ( name_null_ != NULL ) { + delete [] name_null_; + } + if ( blob_ != NULL ) { + delete [] blob_; + } + if ( blob_null_ != NULL ) { + delete [] blob_null_; + } +} + +inline int MyRow::decode( SparrowBuffer* buffer, void* /*dummy*/ ) const { + + int col = 0; + int res; + if ( (res=buffer->addLong( col++, timestamp_ )) != 0 ) return res; + if ( (res=buffer->addLong( col++, slotId_ )) != 0 ) return res; + if ( (res=buffer->addByte( col++, status_ )) != 0 ) return res; + if ( (res=buffer->addShort( col++, samples_ )) != 0 ) return res; + if ( (res=buffer->addDouble( col++, value_ )) != 0 ) return res; + if ( (res=buffer->addString( col++, name_ )) != 0 ) return res; + if ( (res=buffer->addByte( col++, status_ )) != 0 ) return res; + if ( name_null_ == NULL ) { + buffer->addNull( col++ ); + } else { + if ( (res=buffer->addString( col++, name_null_ )) != 0 ) return res; + } + if ( (res=buffer->addByte( col++, status_ )) != 0 ) return res; + if ( (res=buffer->addBlob( col++, blob_, blobLen_ )) != 0 ) return res; + if ( (res=buffer->addByte( col++, status_ )) != 0 ) return res; + if ( blob_null_ == NULL ) { + buffer->addNull( col++ ); + } else { + if ( (res=buffer->addBlob( col++, blob_null_, blob_null_Len_ )) != 0 ) return res; + } + if ( (res=buffer->addByte( col++, status_ )) != 0 ) return res; + + return 0; +} + +//----------------------------------------------------------------------------- + +class TestAlltypes : public Test { + friend class TestManyPartitions; + friend class TestErrors; +public: + TestAlltypes(const SQLparams& sql_params) : Test(sql_params) {;} + + void run(); + +public: + void runMasterFileTest(const Table*); + void runGetTableTest(const Table* table); + void runDisableCoalescingGlobTest(bool loop=false); + void runDisableCoalescingSchemaTest(const char* schema, bool loop=false); + Table* createTable(const char* table_name); + void sendSampleData(const Table* table); + void sendErrorData(const Table* table); + void sendDataFlow(const Table* table); +}; + +#endif // _spw_test_all_types_h \ No newline at end of file diff --git a/storage/sparrow/api_test/column_optim.cpp b/storage/sparrow/api_test/column_optim.cpp new file mode 100644 index 000000000000..e06b1c1c9fbf --- /dev/null +++ b/storage/sparrow/api_test/column_optim.cpp @@ -0,0 +1,512 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "column_optim.h" + +#include "my_sys.h" +#include "my_systime.h" +#include "sql.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow6 +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MyRow6 : public SparrowRow +{ +private: + uint64_t ts_; + mutable int counter_; + int null_col_; + int nb_col_; + +public: + MyRow6(int null_col, int nb_col) : counter_(0), null_col_(null_col), nb_col_(nb_col) { + time_t ltime; + time( <ime ); + ts_ = ltime*1000; + } + + int decode(SparrowBuffer* buffer, void* /*dummy*/) const override; +}; + +int MyRow6::decode(SparrowBuffer* buffer, void* /*dummy*/) const +{ + counter_++; + + int col = 0; + int res = 0; + if ((res=buffer->addLong(col++, ts_)) != 0) + return res; + if (null_col_ <= col && (null_col_+nb_col_) > col) { + buffer->addNull(col++); + } else { + if ((res=buffer->addByte(col++, (unsigned char)counter_)) != 0) + return res; + } + + if (null_col_ <= col && (null_col_+nb_col_) > col) { + buffer->addNull(col++); + } else { + if ((res=buffer->addShort(col++, (unsigned short)counter_)) != 0) + return res; + } + + if (null_col_ <= col && (null_col_+nb_col_) > col) { + buffer->addNull(col++); + } else { + if ((res=buffer->addInt(col++, counter_)) != 0) + return res; + } + + if (null_col_ <= col && (null_col_+nb_col_) > col) { + buffer->addNull(col++); + } else { + if ((res=buffer->addLong(col++, counter_)) != 0) + return res; + } + + if (null_col_ <= col && (null_col_+nb_col_) > col) { + buffer->addNull(col++); + } else { + if ((res=buffer->addDouble(col++, counter_)) != 0) + return res; + } + + if (null_col_ <= col && (null_col_+nb_col_) > col) { + buffer->addNull(col++); + } else { + uint8_t blob[4]; + blob[0] = counter_>>24; + blob[1] = (counter_>>16)&0xFF; + blob[2] = (counter_>>8)&0xFF; + blob[3] = counter_&0xFF; + if ((res=buffer->addBlob(col++, blob, sizeof(blob))) != 0) + return res; + } + + if (null_col_ <= col && (null_col_+nb_col_) > col) { + buffer->addNull(col++); + } else { + char str[32]; + sprintf(str, "%u", counter_); + if ((res=buffer->addString(col++, str)) != 0) + return res; + } + + return 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow7 +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MyRow7 : public SparrowRow +{ +private: + uint64_t ts_; + uint8_t* buffer_; + uint length_; + +public: + MyRow7(uint64_t ts, uint8_t* buffer, uint length) : ts_(ts), buffer_(buffer), length_(length) {;} + + int decode(SparrowBuffer* buffer, void* /*dummy*/) const override; +}; + +int MyRow7::decode(SparrowBuffer* buffer, void* /*dummy*/) const +{ + int col = 0; + buffer->addLong(col++, ts_); + buffer->addBlob(col++, buffer_, length_); + buffer->addInt(col++, 1); + + return 0; +} + + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TestColumnOptim + +TestColumnOptim::TestColumnOptim(const SQLparams& sql_params) : Test(sql_params) { + flushInterval_ = getFlushInterval(); +} + +void TestColumnOptim::runSimple() { + try + { + const char* table_name = "table_null_cols"; + const uint nbRows = 10; + const uint null_col = 1; + + // Initialize environment + dropTable(table_name); + + printf("Creating table %s. Nulled columns is %u\n", table_name, null_col); + AutoPtr
table( createTable(table_name) ); + sendData(table.get(), nbRows, null_col, 1); + waitForFlush(); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +/* Checks that the partition can still be read correctly after adding or removing columns */ +void TestColumnOptim::runtAlterTests(const int null_col) { + try + { + const char* table_name = "table_alter"; + const uint nbRows = 10; + + // Initialize environment + dropTable(table_name); + + printf("Creating table %s. Null column %u\n", table_name, null_col); + AutoPtr
table( createTable(table_name) ); + sendData(table.get(), nbRows, null_col, 1); + waitForFlush(); + printf("Check data can be read correctly...\n"); + getchar(); + + printf("Inserting column after %u\n", null_col); + table = createTable(table_name, -1, null_col+1); + printf("Check data can still be read correctly...\n"); + getchar(); + + printf("Inserting column before %u\n", null_col); + table = createTable(table_name, -1, null_col-1); + printf("Check data can still be read correctly...\n"); + getchar(); + + printf("Dropping column after %u\n", null_col); + table = createTable(table_name, null_col+1); + printf("Check data can still be read correctly...\n"); + getchar(); + + printf("Dropping column before %u\n", null_col); + table = createTable(table_name, null_col-1); + printf("Check data can still be read correctly...\n"); + getchar(); + + printf("Dropping null column %u\n", null_col); + table = createTable(table_name, null_col); + printf("Check data can still be read correctly...\n"); + getchar(); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestColumnOptim::runtAlterTests() { + runtAlterTests(2); // Data in column 2 is nulled + runtAlterTests(7); // Data in column 7 is nulled +} + +/* Creates partitions with different null columns. Check all data can be read correctly */ +void TestColumnOptim::runMultiPartTests() { + try + { + const char* table_name = "table_multi_part"; + const uint nbRows = 10; + + // Initialize environment + dropTable(table_name); + + printf("Creating table %s. Inserting valid data in all columns\n", table_name); + AutoPtr
table( createTable(table_name) ); + uint nbCols = table->getNbColumns() - 1; + + sendData(table.get(), nbRows, 0, 0); + waitForFlush(); + + for (uint n=1; n<=nbCols; ++n) { + for (uint i=0; i<=nbCols-n; ++i) { + printf("Inserting Null data in columns %u\n", i); + sendData(table.get(), nbRows, i+1, n); + waitForFlush(); + } + } + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestColumnOptim::runCoalescingTests(const uint nbRows) { + try + { + const char* table_name = "table_coalesc"; + + // Initialize environment + dropTable(table_name); + + printf("Creating table %s.\n", table_name); + AutoPtr
table( createTable(table_name) ); + + for (uint i=0; i<2; ++i) { + printf("Inserting valid data in all columns\n"); + sendData(table.get(), nbRows, 0, 0); + waitForFlush(); + + printf("Inserting Null data in columns 1\n"); + sendData(table.get(), nbRows, 1, 1); + waitForFlush(); + + printf("Inserting Null data in columns 7\n"); + sendData(table.get(), nbRows, 7, 1); + waitForFlush(); + } + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestColumnOptim::runDNSTests() { + try + { + const char* table_name = "table_dns"; + const uint nbRows = 10; + + // Initialize environment + dropTable(table_name); + + printf("Creating table %s. Inserting valid data in all columns\n", table_name); + AutoPtr
table( createTableDNS(table_name) ); + sendDataDNS(table.get(), nbRows); + waitForFlush(); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestColumnOptim::runVolumeTests() { + runCoalescingTests( 1000000 ); +} + + + +// Create TEST Table + +Table* TestColumnOptim::createTable( const char* table_name, int droppedCol, int insertCol, uint64_t coalescingPeriod ) +{ + Table* table = connect_->createTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 24*3600*1000; + uint32_t aggregationPeriod = 300; + uint64_t defaultWhere = 3600*24*1000; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + table->setDefaultWhere(defaultWhere); + + // Columns + int col = 0, i = 0, last_indexable = 0; + table->appendColumn( "timestamp", col++, COL_TIMESTAMP, 3 ); + ++i; + if (insertCol == i) { + table->appendColumn( "insert_byte", col++, COL_BYTE, 0, COL_UNSIGNED | COL_NULLABLE ); + } + if (droppedCol != i) { + table->appendColumn( "value_byte", col++, COL_BYTE, 0, COL_UNSIGNED | COL_NULLABLE ); + } + ++i; + if (insertCol == i) { + table->appendColumn( "insert_short", col++, COL_SHORT, 0, COL_UNSIGNED | COL_NULLABLE ); + } + if (droppedCol != i) { + table->appendColumn( "value_short", col++, COL_SHORT, 0, COL_UNSIGNED | COL_NULLABLE ); + } + ++i; + if (insertCol == i) { + table->appendColumn( "insert_int", col++, COL_INT, 0, COL_UNSIGNED | COL_NULLABLE ); + } + if (droppedCol != i) { + table->appendColumn( "value_int", col++, COL_INT, 0, COL_UNSIGNED | COL_NULLABLE ); + } + ++i; + if (insertCol == i) { + table->appendColumn( "insert_long", col++, COL_LONG, 0, COL_UNSIGNED | COL_NULLABLE ); + } + if (droppedCol != i) { + table->appendColumn( "value_long", col++, COL_LONG, 0, COL_UNSIGNED | COL_NULLABLE ); + } + ++i; + if (insertCol == i) { + table->appendColumn( "insert_double", col++, COL_DOUBLE, 0, COL_NULLABLE ); + } + if (droppedCol != i) { + table->appendColumn( "value_double", col++, COL_DOUBLE, 0, COL_NULLABLE ); + } + last_indexable = col - 1; + ++i; + if (insertCol == i) { + table->appendColumn( "insert_blob", col++, COL_BLOB, 0, COL_NULLABLE ); + } + if (droppedCol != i) { + table->appendColumn( "value_blob", col++, COL_BLOB, 0, COL_NULLABLE ); + } + ++i; + if (insertCol == i) { + table->appendColumn( "insert_str", col++, COL_STRING, 0, COL_NULLABLE ); + } + if (droppedCol != i) { + table->appendColumn( "value_str", col++, COL_STRING, 0, COL_NULLABLE ); + } + ++i; + if (insertCol == i) { + table->appendColumn( "insert_str_last", col++, COL_STRING, 0, COL_NULLABLE ); + } + + // Indexes + int indxId; + if ( (indxId=table->appendIndex( "index_1", 0, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + + for (int j=0; jappendIndex( index_name, j, true )) < 0 ) + throw MyException::create( false, "Failed to create index %s", index_name ); + table->addColToIndex( indxId, j+1 ); + } + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + +Table* TestColumnOptim::createTableDNS( const char* table_name, uint64_t coalescingPeriod ) +{ + Table* table = connect_->createTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 24*3600*1000; + uint32_t aggregationPeriod = 300; + uint64_t defaultWhere = 3600*24*1000; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + table->setDefaultWhere(defaultWhere); + + // Columns + int col = 0; + table->appendColumn( "timestamp", col++, COL_TIMESTAMP, 3 ); + table->appendColumn( "IP", col++, COL_BLOB, 0, COL_IP_ADDRESS | COL_NULLABLE ); + table->appendColumn( "dnsId", col++, COL_INT, 0, COL_DNS_IDENTIFIER | COL_NULLABLE ); + table->appendColumn( "lookup", col++, COL_STRING, 256, COL_IP_LOOKUP | COL_NULLABLE, 1 ); + + // Indexes + int indxId; + if ( (indxId=table->appendIndex( "index_1", 0, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + + int indxDnsEntry; + indxDnsEntry = table->addDnsEntry( 1 ); + table->addDnsServer( indxDnsEntry, "frluldc03", 0, "10.1.26.218", 0 ); + table->addDnsServer( indxDnsEntry, "frluldc02", 0, "10.1.26.18", 0 ); + + indxDnsEntry = table->addDnsEntry( 2 ); + table->addDnsServer( indxDnsEntry, "frluldc02", 0, "10.1.26.18", 0 ); + table->addDnsServer( indxDnsEntry, "frluldc03", 0, "10.1.26.218", 0 ); + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + + +void TestColumnOptim::sendData( const Table* table, uint nbRows, int null_col, int nb_col ) +{ + uint32_t buffSize = 16*1024*512; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + printf("Inserting %u rows in table %s.\n", nbRows, table->getTableName()); + MyRow6 rows(null_col, nb_col); + for ( uint i=0; iaddRow(rows); + if ( res == SPW_API_BUFFER_FULL ) { + printf("Flushing buffers (%u%% done).\n", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + // This row is not complete. Insert it again in next batch. + i--; + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + printf("Done.\n"); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + + +void TestColumnOptim::sendDataDNS( const Table* table, uint nbRows ) +{ + uint32_t buffSize = 64*1024*512; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + time_t ltime; + time( <ime ); + uint64_t ts = ltime*1000; + + for ( uint i=1; iaddRow(rows); + if ( res < 0 ) + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestColumnOptim::waitForFlush() { + printf("Waiting %us for partition flush...", flushInterval_); + my_sleep((flushInterval_+1)*1000000ULL); // We add one second to the theoretical flush period to be sure. + printf("Ok\n"); +} \ No newline at end of file diff --git a/storage/sparrow/api_test/column_optim.h b/storage/sparrow/api_test/column_optim.h new file mode 100644 index 000000000000..a204337f622a --- /dev/null +++ b/storage/sparrow/api_test/column_optim.h @@ -0,0 +1,35 @@ +#ifndef _spw_test_column_optim_h +#define _spw_test_column_optim_h + +#include "common.h" + +using namespace Sparrow; + +//----------------------------------------------------------------------------- + +class TestColumnOptim : public Test { +private: + uint flushInterval_; + +public: + TestColumnOptim(const SQLparams& sql_params); + //~TestColumnOptim(); + + void runSimple(); + void runMultiPartTests(); + void runtAlterTests(); + void runCoalescingTests(const uint nbRows=10); + void runDNSTests(); + void runVolumeTests(); + +private: + Table* createTable(const char* table_name, int droppedCol=-1, int insertCol=-1, uint64_t coalescingPeriod=3600*1000); + Table* createTableDNS(const char* table_name, uint64_t coalescingPeriod=3600*1000); + void sendData(const Table* table, uint nbRows, int null_col, int nb_col); + void sendDataDNS(const Table* table, uint nbRows); + + void runtAlterTests(const int null_col); + void waitForFlush(); +}; + +#endif // _spw_test_column_optim_h \ No newline at end of file diff --git a/storage/sparrow/api_test/column_subset.cpp b/storage/sparrow/api_test/column_subset.cpp new file mode 100644 index 000000000000..2baaead6c48d --- /dev/null +++ b/storage/sparrow/api_test/column_subset.cpp @@ -0,0 +1,430 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "column_subset.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow4 +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MyRow4 : public SparrowRow +{ +private: + uint64_t ts_; + testDefinition& params_; // determines the selection of columns +public: + MyRow4(const uint64_t& ts, testDefinition& params) : ts_(ts), params_(params) {;} + int decode(SparrowBuffer* buffer, void* dummy) const override; +}; + + +int MyRow4::decode(SparrowBuffer* buffer, void* dummy) const +{ + for (uint i=0; iaddLong(col, ts_); + break; + case 1: + res = buffer->addByte(col, -(int)params_.id_); + break; + case 2: + res = buffer->addByte(col, params_.id_); + break; + case 3: + res = buffer->addDouble(col, -(double)params_.id_); + break; + case 4: + res = buffer->addDouble(col, params_.id_); + break; + case 5: + res = buffer->addInt(col, -(int)params_.id_); + break; + case 6: + res = buffer->addInt(col, params_.id_); + break; + case 7: + res = buffer->addLong(col, -(int)params_.id_); + break; + case 8: + res = buffer->addLong(col, params_.id_); + break; + case 9: + { + char str[256]; + sprintf( str, "%d", params_.id_ ); + res = buffer->addString(col, str); + } + break; + case 10: + res = buffer->addShort(col, -(int)params_.id_); + break; + case 11: + res = buffer->addShort(col, params_.id_); + break; + case 12: + { + uint8_t blob[255]; + memset( blob, 0x12, sizeof(blob) ); + res = buffer->addBlob(col, blob, sizeof(blob)); + } + break; + } + if (res != 0) return res; + } + return 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow5 +////////////////////////////////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow5 +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MyRow5 : public SparrowRow +{ +private: + uint64_t ts_; + testDefinitionM& params_; // determines the selection of columns +public: + MyRow5(const uint64_t& ts, testDefinitionM& params) : ts_(ts), params_(params) {;} + int decode(SparrowBuffer* buffer, void* dummy) const override; +}; + + +int MyRow5::decode(SparrowBuffer* buffer, void* dummy) const +{ + buffer->addLong(0, ts_); // Timestamp column is always set to a valid value + for ( uint i=0; iaddDouble( params_.columns_[i], params_.columns_[i] ); + if ( res != 0 ) return res; + } + return 0; +} + +//----------------------------------------------------------------------------- + +void TestColumnSubset::run() { + try + { + insertSelectColumns( "table_11", true ); + insertSelectColumns( "table_12", false ); + insertSelectColumnsMassive( "table_13", true, 512 ); // Limit set by SPARROW_MAX_BIT_SIZE + insertSelectColumnsMassive( "table_14", false, 2000 ); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +Table* TestColumnSubset::createTable( const char* table_name, bool nullable ) +{ + Table* table = connect_->createTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 48*3600*1000; + uint64_t coalescingPeriod = 24*3600*1000; + uint32_t aggregationPeriod = 300; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + + // Columns + uint32_t flags = (nullable ? COL_NULLABLE : 0); + int col = 0; + table->appendColumn( "timestamp", col++, COL_TIMESTAMP, 3 ); + table->appendColumn( "byte", col++, COL_BYTE, 0, flags); + table->appendColumn( "byte_u", col++, COL_BYTE, 0, flags | COL_UNSIGNED); + table->appendColumn( "double", col++, COL_DOUBLE, 0, flags); + table->appendColumn( "double_u", col++, COL_DOUBLE, 0, flags | COL_UNSIGNED); + table->appendColumn( "int", col++, COL_INT, 0, flags); + table->appendColumn( "int_u", col++, COL_INT, 0, flags | COL_UNSIGNED); + table->appendColumn( "long", col++, COL_LONG, 0, flags); + table->appendColumn( "long_u", col++, COL_LONG, 0, flags | COL_UNSIGNED); + table->appendColumn( "string", col++, COL_STRING, 32, flags); + table->appendColumn( "short", col++, COL_SHORT, 0, flags); + table->appendColumn( "short_u", col++, COL_SHORT, 0, flags | COL_UNSIGNED); + table->appendColumn( "blob", col++, COL_BLOB, 255, flags); + + // Indexes + int indxId; + if ( (indxId=table->appendIndex( "index_1", 0, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + if ( (indxId=table->appendIndex( "index_2", 0, true )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + table->addColToIndex( indxId, 1 ); + + // Foreign keys + table->appendFK( "fk_1", 1, "DB2", "TEST2", "COL2" ); + + // DNS configuration + int indxDnsEntry; + indxDnsEntry = table->addDnsEntry( -1 ); + table->addDnsServer( indxDnsEntry, "dns1", 12, "10.12.14.16", 2500 ); + table->addDnsServer( indxDnsEntry, "dns2", 15, "20.22.24.26", 4500 ); + + indxDnsEntry = table->addDnsEntry( 0 ); + table->addDnsServer( indxDnsEntry, "dns3", 13, "10.12.14.36", 3500 ); + table->addDnsServer( indxDnsEntry, "dns4", 16, "20.22.24.46", 5500 ); + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + +void TestColumnSubset::getColumnNames(ColumnNames* columns, Table* table, const testDefinition& params) { + for ( uint i=0; igetColumn( params.columns_[i] ); + columns->appendName( column.getName() ); + } +} + +void TestColumnSubset::testSelColumns( Table* table, testDefinition& params, int test_number ) +{ + // Insert no column + uint32_t buffSize = 64*1024; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + std::cout << "" << std::endl; + std::cout << "Inserting rows using config " << params << std::endl; + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + now += test_number; + + const uint nbRows = 1; // No need to create N times the same row + for (uint i=0; iaddRow( row ) < 0 ) + throw MyException::create( false, "failed to add row to Sparrow buffer" ); + } + std::cout << "Filled buffer. Retrieving sub-set of columns." << std::endl; + + Sparrow::AutoPtr colNames(connect_->createColumnNames(params.columns_.size())); + getColumnNames( colNames.get(), table, params ); + { + std::cout << "Our column selection: {"; + bool first = true; + for (uint i=0; isize(); ++i) { + if (!first) std::cout << ", "; + first = false; + std::cout << colNames->getName(i); + } + std::cout << "}" << std::endl; + } + + // Send the content of the resulting Sparrow Buffer + int res = connect_->insertData( table, colNames.get(), spwBuffer.get() ); + if ( res != 0 ) { + std::cout << "Test " << params << " ... Failed: insert returned " << res << std::endl; + } else { + std::cout << "Test " << params << " ... OK" << std::endl; + } + } + catch ( const MyException& e ) + { + std::cout << "Test " << params << " ... Failed: exception " << e.getText() << ", " << errmsg() << std::endl; + } +} + +void TestColumnSubset::insertSelectColumns( const char* tableName, bool nullable ) +{ + try + { + dropTable(tableName); + + // Create table with only one column, timestamp + std::cout << "Creating data table " << tableName << " ..." << std::endl; + Sparrow::AutoPtr
tbl_1( createTable( tableName, nullable ) ); + if ( tbl_1->create( connect_ ) != 0 ) + throw MyException::create( false, "Failed to create Data Table %s.", tableName ); + std::cout << "Created data table " << tableName << std::endl; + + uint tests[][32] = {{0}, {1,0}, {1,1}, {2,0,1}, {3,0,1,1}, {2,0,2}, {2,0,3}, {2,0,4}, {2,0,5}, {2,0,6}, {2,0,7}, + {2,0,8}, {2,0,9}, {2,0,10}, {2,0,11}, {2,0,12}, {13,0,1,2,3,4,5,6,7,8,9,10,11,12}, {14,0,1,2,3,4,5,6,7,8,9,10,11,12,12}}; + uint nb_tests = sizeof(tests)/sizeof(tests[0]); + for ( uint i=0; icreateTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 48*3600*1000; + uint64_t coalescingPeriod = 24*3600*1000; + uint32_t aggregationPeriod = 300; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + + // Columns + uint32_t flags = (nullable ? COL_NULLABLE : 0); + table->appendColumn( "timestamp", 0, COL_TIMESTAMP, 3 ); + for ( uint i=1; i<=nbColumns; ++i ) { + std::ostringstream colName; + colName << "col_" << i; + table->appendColumn( colName.str().c_str(), i, COL_DOUBLE, 0, flags); + } + + // Indexes + int indxId; + if ( (indxId=table->appendIndex( "index_1", 0, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + if ( (indxId=table->appendIndex( "index_2", 0, true )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + table->addColToIndex( indxId, 1 ); + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + +void TestColumnSubset::getColumnNamesM( ColumnNames* columns, Table* table, const testDefinitionM& params ) { + const Column& column = table->getColumn(0); // Timestamp column is always set + columns->appendName( column.getName() ); + for ( uint i=0; igetColumn( params.columns_[i] ); + columns->appendName( column.getName() ); + } +} + +void TestColumnSubset::testSelColumnsMassive( Table* table, testDefinitionM& params, uint nbRows ) +{ + // Insert no column + uint32_t buffSize = 1024*1024; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + Sparrow::AutoPtr colNames(connect_->createColumnNames(params.columns_.size())); + getColumnNamesM( colNames.get(), table, params ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + + for (uint i=0; iaddRow( row ); + if ( res < 0 ) { + if ( res == SPW_API_BUFFER_FULL ) { + int res = connect_->insertData( table, colNames.get(), spwBuffer.get() ); + if ( res != 0 ) { + std::cout << "Test " << params << " ... Failed: insert returned " << res << std::endl; + } else { + std::cout << "Test " << params << " ... OK" << std::endl; + } + spwBuffer = connect_->createBuffer( table, buffSize ); + --i; + } else { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + } + + + // Send the content of the resulting Sparrow Buffer + int res = connect_->insertData( table, colNames.get(), spwBuffer.get() ); + if ( res != 0 ) { + std::cout << "Test " << params << " ... Failed: insert returned " << res << std::endl; + } else { + std::cout << "Test " << params << " ... OK" << std::endl; + } + } + catch ( const MyException& e ) + { + std::cout << "Test " << params << " ... Failed: exception " << e.getText() << ", " << errmsg() << std::endl; + } +} + +void TestColumnSubset::insertSelectColumnsMassive( const char* tableName, bool nullable, uint nbCols ) +{ + const uint nbRows = 1000; + try + { + dropTable(tableName); + + // Create table with only one column, timestamp + Sparrow::AutoPtr
tbl_1( createTableMassive( tableName, nullable, nbCols ) ); + if ( tbl_1->create( connect_ ) != 0 ) + throw MyException::create( false, "Failed to create Data Table %s.", tableName ); + std::cout << "Created Data Table " << tableName << std::endl; + + uint tests[][32] = {{512, 1, 10, 50, 100, 250, UINT_MAX}, + {2048, 1, 100, 1000, UINT_MAX}}; + uint nb_tests = sizeof(tests)/sizeof(tests[0]); + + // Choose the right set of tests depending on the number of columns in the table + uint k = 0; + for ( ; k columns_; +public: + testDefinition(uint id, bool nulls, uint nbCols, uint* columns) : id_(id), nulls_(nulls) { + for ( uint i=0; i columns_; // Indexes of populated columns (first column has index 0) + +public: + testDefinitionM(uint id, bool nulls, uint nbCols, uint nbValues) : id_(id), nulls_(nulls), nbColumns_(nbCols), nbValues_(nbValues) { + uint step = nbColumns_/nbValues_; + for ( uint i=0, col=1; i +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "sql.h" + +using namespace Sparrow; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Test + +Test::Test(const SQLparams& sql_params) : sql_params_(sql_params) { + try + { + initialize(); + if ( !(connect_=createConnect()) ) + throw MyException::create( false, "Failed to get Connection object." ); + if ( connect_->setProperties( sql_params_.getHost(), sql_params_.getLogin(), sql_params_.getPsswd(), sql_params_.getMySQLPort(), sql_params_.getSpwPort() ) < 0 ) + throw MyException::create( false, "Failed to set Properties." ); + if ( connect_->connect() < 0 ) + throw MyException::create( false, "Failed to set Connect to Sparrow." ); + } catch ( const MyException& e ) { + printf( "Failed to connect to Sparrow: %s : %s\n", e.getText(), errmsg() ); + } +} + +Test::~Test() { + if ( connect_ ) { + if ( !connect_->isClosed() ) { + printf( "Disconnecting..." ); + connect_->disconnect(); + printf( "done\n" ); + } + printf( "Deleting connection object..." ); + delete connect_; + connect_= NULL; + printf( "done\n" ); + } + +} + +void Test::dropTable(const char* table_name) { + printf( "Dropping Sparrow table '%s'.'%s' ...", sql_params_.getSchema(), table_name); + MySQLGuard mysql(sql_params_.getLogin(), sql_params_.getPsswd(), sql_params_.getMySQLPort()); + char sql[1024]; + sprintf(sql, "drop table if exists %s.%s", sql_params_.getSchema(), table_name); + mysql.execute(sql); + printf( "done\n" ); +} + +void Test::reset(Table*& table) { + dropTable(table->getTableName()); + printf( "Deleting Sparrow table..." ); + delete table; + table = NULL; + printf( "done\n" ); +} + +uint Test::getFlushInterval() { + MySQLGuard mysql(sql_params_.getLogin(), sql_params_.getPsswd(), sql_params_.getMySQLPort()); + mysql.execute("show variables like 'sparrow_flush_interval'"); + MYSQL_RES* result = mysql.get(); + MYSQL_ROW row = mysql_fetch_row(result); + if (row == 0) { + printf("Failed to get 'sparrow_flush_interval'\n."); + return 0; + } + const char* str = row[1]; + uint value = static_cast(atoi(str)); + return value; +} \ No newline at end of file diff --git a/storage/sparrow/api_test/common.h b/storage/sparrow/api_test/common.h new file mode 100644 index 000000000000..8244b9d6393e --- /dev/null +++ b/storage/sparrow/api_test/common.h @@ -0,0 +1,28 @@ +#ifndef _spw_test_common_h +#define _spw_test_common_h + + +//#include "../api/include/spw_global.h" +//#include "../api/include/global.h" +#include "../api/include/connection.h" +#include "../api/misc.h" +#include "exception.h" +#include "utils.h" + +class Test { + +protected: + SQLparams sql_params_; + Sparrow::Connection* connect_; + +public: + Test(const SQLparams& sql_params); + virtual ~Test(); + + void reset(Sparrow::Table*& table); + void dropTable(const char* table_name); + uint getFlushInterval(); + +}; + +#endif // _spw_test_common_h \ No newline at end of file diff --git a/storage/sparrow/api_test/errors.cpp b/storage/sparrow/api_test/errors.cpp new file mode 100644 index 000000000000..d489cd1501cf --- /dev/null +++ b/storage/sparrow/api_test/errors.cpp @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "errors.h" +#include "all_types.h" + + +class MyRowErr : public MyRow { +private: + mutable int skipCol_; // Column to skip during insertion + +public: + MyRowErr() : MyRow(), skipCol_(-1) {;} + MyRowErr(uint64_t timestamp, uint64_t slotId, uint8_t status, uint16_t samples, const char* name, const char* name_null, double value, + const uint8_t* blob, int blobLen, const uint8_t* blob_null, int blob_null_Len) : + MyRow(timestamp, slotId, status, samples, name, name_null, value, blob, blobLen, blob_null, blob_null_Len), skipCol_(-1) {;} + + int getSkippedCol() const { return skipCol_; } + void setSkippedCol(int col) { skipCol_ = col; } + + int decode(SparrowBuffer* buffer, void* dummy) const override; +}; + +inline int MyRowErr::decode( SparrowBuffer* buffer, void* /*dummy*/ ) const { + + const int nbCols = 13; + + int col = -1; + int res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addLong( col, timestamp_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addLong( col, slotId_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addByte( col, status_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addShort( col, samples_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addDouble( col, value_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addString( col, name_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addByte( col, status_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) ) { + if ( name_null_ == NULL ) { + buffer->addNull( col++ ); + } else { + if ( (res=buffer->addString( col, name_null_ )) != 0 ) return res; + } + } + if ( (++col != skipCol_%nbCols) && (res=buffer->addByte( col, status_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addBlob( col, blob_, blobLen_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) && (res=buffer->addByte( col, status_ )) != 0 ) return res; + if ( (++col != skipCol_%nbCols) ) { + if ( blob_null_ == NULL ) { + buffer->addNull( col++ ); + } else { + if ( (res=buffer->addBlob( col, blob_null_, blob_null_Len_ )) != 0 ) return res; + } + } + if ( (++col != skipCol_%nbCols) && (res=buffer->addByte( col, status_ )) != 0 ) return res; + + return 0; +} + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Create TEST Table + +void TestErrors::run() { + try + { + const char* table_name = "table_errors"; + + TestAlltypes allTypesTests(sql_params_); + + printf("Creating table %s...", table_name); + AutoPtr
table( allTypesTests.createTable(table_name) ); + printf("OK\n"); + + runInsertionTests( table.get() ); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestErrors::runInsertionTests( const Table* table ) { + + // Error cases + const uint32_t buffSize = 64*1024*512; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + const int blobLen = 11; // columns length + 1 + uint8_t blob[blobLen]; + for ( int i=0; igetNbColumns(); + const uint32_t nbRows = 1; + MyRowErr data[nbRows]; + data[0] = MyRowErr( now, 1, 0xCF, 0xFFFF, "abcdefghij", "abcdefghij", 1.1, (uint8_t*)blob, 10, (uint8_t*)blob, 10 ); + + for (uint j=0; jclear(); + + printf("Skipping value for column %u in insertion buffer for table %s...\r\n", j, table->getTableName()); + data[0].setSkippedCol(j); + + // Copy the data to the Sparrow Buffer, one row after another + for ( uint32_t i=0; iaddRow( data[i] ); + if ( res == SPW_API_BUFFER_FULL ) { + printf("%u%%, ", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + + } catch ( const MyException& e ) { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } + } + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + + diff --git a/storage/sparrow/api_test/errors.h b/storage/sparrow/api_test/errors.h new file mode 100644 index 000000000000..b80a168facf4 --- /dev/null +++ b/storage/sparrow/api_test/errors.h @@ -0,0 +1,21 @@ +#ifndef _spw_test_errors_h +#define _spw_test_errors_h + +#include "common.h" + +using namespace Sparrow; + + +//----------------------------------------------------------------------------- + +class TestErrors : public Test { +public: + TestErrors(const SQLparams& sql_params) : Test(sql_params) {;} + + void run(); + +private: + void runInsertionTests(const Table*); +}; + +#endif // _spw_test_errors_h \ No newline at end of file diff --git a/storage/sparrow/api_test/exception.cc b/storage/sparrow/api_test/exception.cc new file mode 100644 index 000000000000..5b541f8c8a38 --- /dev/null +++ b/storage/sparrow/api_test/exception.cc @@ -0,0 +1,106 @@ +#include "exception.h" + +#include +#include +#include + +#ifdef _WIN32 +#include "Windows.h" +#else +#include +#endif + + +///////////////////////////////////////////////////////////////////////////////////////////////////// +// MyException +////////////////////////////////////////////////////////////////////////////////////////////////////// + +MyException spwerror; + +MyException::MyException(const char* text, int errcode /* = -1 */) + : errcode_(errcode) { + strncpy(buffer_, text, sizeof(buffer_) - 1); + buffer_[sizeof(buffer_) - 1] = '\0'; +} + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-truncation" +#endif + +// STATIC +MyException MyException::create( const bool addError, int errcode, const char* format, ... ) { + char buffer[1024]; + va_list varargs; + va_start(varargs, format); + vsnprintf(buffer, sizeof(buffer), format, varargs); + va_end(varargs); + if (addError) { + char error[1024]; +#ifdef _WIN32 + LPSTR serror = error; + if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, GetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), serror, sizeof(error), 0) == 0) { + snprintf(error, sizeof(error), "error %d", GetLastError()); + } else { // Windows adds a nasty new line char... + size_t l = strlen(error) - 1; + while (error[l] == '\n' || error[l] == '\r') { + error[l--] = 0; + } + } +#else + snprintf(error, sizeof(error), "%s", strerror(errno)); +#endif + char result[1024]; + snprintf(result, sizeof(result), "%s (%s)", buffer, error); + return MyException(result, errcode); + } else { + return MyException(buffer, errcode); + } +} + +// STATIC +MyException MyException::create(const bool addError, const char* format, ...) { + char buffer[1024]; + va_list varargs; + va_start(varargs, format); + vsnprintf(buffer, sizeof(buffer), format, varargs); + va_end(varargs); + if (addError) { + char error[1024]; +#ifdef _WIN32 + LPSTR serror = error; + if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, GetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), serror, sizeof(error), 0) == 0) { + snprintf(error, sizeof(error), "error %d", GetLastError()); + } else { // Windows adds a nasty new line char... + size_t l = strlen(error) - 1; + while (error[l] == '\n' || error[l] == '\r') { + error[l--] = 0; + } + } +#else + snprintf(error, sizeof(error), "%s", strerror(errno)); +#endif + char result[1024]; + snprintf(result, sizeof(result), "%s (%s)", buffer, error); + return MyException(result); + } else { + return MyException(buffer); + } +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + + +const MyException& MyException::operator = ( const MyException& excpt ) { + errcode_ = excpt.errcode_; + memcpy( buffer_, excpt.buffer_, sizeof(buffer_) ); + return *this; +} + +void MyException::toLog() const { + fprintf(stderr, "Sparrow Test App: %s", getText()); +} diff --git a/storage/sparrow/api_test/exception.h b/storage/sparrow/api_test/exception.h new file mode 100644 index 000000000000..6a94a4bd4989 --- /dev/null +++ b/storage/sparrow/api_test/exception.h @@ -0,0 +1,56 @@ +#ifndef _my_exception_h_ +#define _my_exception_h_ + +#include "my_compiler.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyException +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef __GNUG__ + #if __GNUC__ >= 8 + #define _THROW_(a) + #else + #define _THROW_(a) throw(a) + #endif +#elif defined(_MSC_VER) + #if _MSC_VER >= 1800 + #define _THROW_(a) + #else + #define _THROW_(a) throw(a) + #endif +#else + #define _THROW_(a) throw(a) +#endif + + +class MyException { +private: + + int errcode_; + char buffer_[1024]; + +public: + + MyException() : errcode_(0) { + buffer_[0] = '\0'; + } + MyException(const char* text, int errcode = -1); + + MyException(const MyException&) = default; + + static MyException create(const bool addError, const char* format, ...) MY_ATTRIBUTE((format(printf, 2, 3))); + static MyException create(const bool addError, int errcode, const char* format, ...) MY_ATTRIBUTE((format(printf, 3, 4))); + + const MyException& operator = (const MyException&); + const char* getText() const { + return buffer_; + } + int getErrcode() const { return errcode_; } + void set_err_code(unsigned int errcode) { + errcode_ = errcode; + } + void toLog() const; +}; + +#endif /* #ifndef _my_exception_h_ */ diff --git a/storage/sparrow/api_test/many_partitions.cpp b/storage/sparrow/api_test/many_partitions.cpp new file mode 100644 index 000000000000..d462f071a011 --- /dev/null +++ b/storage/sparrow/api_test/many_partitions.cpp @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "all_types.h" +#include "many_partitions.h" + +//----------------------------------------------------------------------------- +void TestManyPartitions::run() { + try + { + const char* table_name = "table_many_partitions"; + + TestAlltypes allTypesTests(sql_params_); + + printf("Creating table %s...", table_name); + AutoPtr
table( allTypesTests.createTable( table_name ) ); + printf("OK\n"); + + createManyPartitions( table.get(), 5 ); + insertInterlacedTimestamps( table.get() ); + insertInterlacedTimestampsMassive( table.get() ); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestManyPartitions::createManyPartitions( const Table* table, uint nbPartitions ) +{ + uint32_t buffSize = 64*1024*512; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + const int blobLen = 11; // columns length + 1 + uint8_t blob[blobLen]; + for ( int i=0; igetCoalescPeriod(); + + for ( uint i=0; iaddRow( row ); + if ( res < 0 ) + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + MyRow row2( now+i-coalescing_period, i+1, 0xCF, 0xFFFF, "abcdefghij", "abcdefghij", 1.1, (uint8_t*)blob, 10, (uint8_t*)blob, 10 ); + res = spwBuffer->addRow( row2 ); + if ( res < 0 ) + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestManyPartitions::insertInterlacedTimestamps( const Table* table ) +{ + uint32_t buffSize = 64*1024*1024; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + const int blobLen = 11; // columns length + 1 + uint8_t blob[blobLen]; + for ( int i=0; igetCoalescPeriod(); + uint64_t t0 = now - now%coalescing_period; + const int increment[] = {2, -2, 0, -1, 3, -3, 1, -1}; + const uint nb_incr = sizeof(increment)/sizeof(increment[0]); + for ( uint i=0; iaddRow( row ); + if ( res < 0 ) + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestManyPartitions::insertInterlacedTimestampsMassive( const Table* table ) +{ + const uint nbrows = 100; + uint32_t buffSize = 64*1024*1024; + try + { + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + const int blobLen = 11; // columns length + 1 + uint8_t blob[blobLen]; + for ( int i=0; igetCoalescPeriod(); + uint64_t t0 = now - now%coalescing_period; + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + for ( uint i=0; iaddRow( row ); + if ( res < 0 ) { + if ( res == SPW_API_BUFFER_FULL ) { + connect_->insertData( table, spwBuffer.get() ); + spwBuffer = connect_->createBuffer( table, buffSize ); + } else { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + + diff --git a/storage/sparrow/api_test/many_partitions.h b/storage/sparrow/api_test/many_partitions.h new file mode 100644 index 000000000000..f4d32142fc3c --- /dev/null +++ b/storage/sparrow/api_test/many_partitions.h @@ -0,0 +1,22 @@ +#ifndef _spw_test_many_partitions_h +#define _spw_test_many_partitions_h + +#include "common.h" + +using namespace Sparrow; + +//----------------------------------------------------------------------------- + +class TestManyPartitions : public Test { +public: + TestManyPartitions(const SQLparams& sql_params) : Test(sql_params) {;} + + void run(); + +private: + void createManyPartitions(const Table* table, uint nbPartitions); + void insertInterlacedTimestamps(const Table* table); + void insertInterlacedTimestampsMassive(const Table* table); +}; + +#endif // _spw_test_many_partitions_h \ No newline at end of file diff --git a/storage/sparrow/api_test/sparrow_api_test.cpp b/storage/sparrow/api_test/sparrow_api_test.cpp new file mode 100644 index 000000000000..1606419a3130 --- /dev/null +++ b/storage/sparrow/api_test/sparrow_api_test.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "../api/include/connection.h" +#include "../api/misc.h" +#include "exception.h" + +#include "all_types.h" +#include "column_optim.h" +#include "column_subset.h" +#include "errors.h" +#include "many_partitions.h" +#include "too_many_columns.h" +#include "vl.h" + +//#define TEST_BASIC +//#define TEST_CREATETABLE_TOO_MANY_COLS +//#define TEST_COALESCING +//#define TEST_CREATETABLE +//#define TEST_VL +//#define TEST_MASTERFILE +//#define TEST_INSERT +//#define TEST_INSERT_2 +//#define TEST_MANY_PARTITIONS +//#define TEST_INSERT_INTERLACED_TS +//#define TEST_INSERT_INTERLACED_TS_MASS +//#define TEST_INSERT_SELECT_COLUMNS +//#define TEST_INSERT_SELECT_COLUMNS_MASSIVE +#define TEST_COLUMN_OPTIM +//#define TEST_ERRORS +//#define TEST_DISABLE_COALESCING + +// Connection properties +#define EXAMPLE_DB "test_1" +#define EXAMPLE_URL "tcp://127.0.0.1:38000" +#define EXAMPLE_HOST "127.0.0.1" +#define EXAMPLE_MYSQL_PORT 38004 +#define EXAMPLE_SPARROW_PORT 38005 +#define EXAMPLE_USER "root" +#define EXAMPLE_PASS "infovista" + +const char* schema = "test_spw"; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MAIN +////////////////////////////////////////////////////////////////////////////////////////////////////// + +int main (int argc, char* argv[]) +{ + const char* host = (argc >= 2 ? argv[1] : EXAMPLE_HOST); + const char* user = (argc >= 3 ? argv[2] : EXAMPLE_USER); + const char* pass = (argc >= 4 ? argv[3] : EXAMPLE_PASS); + //const char* database = (argc >= 5 ? argv[4] : EXAMPLE_DB); + uint32_t mysqlPort = EXAMPLE_MYSQL_PORT; + uint32_t sparrowPort = EXAMPLE_SPARROW_PORT; + if ( argc >= 5 ) { + mysqlPort = atoi(argv[4]); + } + if ( argc >= 6 ) { + sparrowPort = atoi(argv[5]); + } + + SQLparams sql_params(host, user, pass, mysqlPort, sparrowPort, schema); + +#ifdef TEST_BASIC + TestAlltypes test_all(sql_params); + test_all.run(); +#endif + +#ifdef TEST_DISABLE_COALESCING + TestAlltypes test_all(sql_params); + test_all.runDisableCoalescingSchemaTest("dummy_table", true); +#endif + +#ifdef TEST_ERRORS + TestErrors test_errors(sql_params); + test_errors.run(); +#endif + +#ifdef TEST_COLUMN_OPTIM + TestColumnOptim test_col_optim(sql_params); + //test_col_optim.runSimple(); + test_col_optim.runtAlterTests(); // Not implemented yet in Sparrow. To test later + //test_col_optim.runMultiPartTests(); + //test_col_optim.runCoalescingTests(); + //test_col_optim.runDNSTests(); + //test_col_optim.runVolumeTests(); +#endif + +#ifdef TEST_CREATETABLE_TOO_MANY_COLS +#endif + + +#ifdef TEST_INSERT_SELECT_COLUMNS + TestColumnSubset test_ins_sel_cols(sql_params); + test_ins_sel_cols.run(); +#endif + +#ifdef TEST_VL + TestVL testVL(sql_params); + testVL.testColaescing(); +#endif + + +#ifdef TEST_MANY_PARTITIONS +#endif + + getchar(); + + return 0; +} + + diff --git a/storage/sparrow/api_test/sql.cpp b/storage/sparrow/api_test/sql.cpp new file mode 100644 index 000000000000..336e24f83ce3 --- /dev/null +++ b/storage/sparrow/api_test/sql.cpp @@ -0,0 +1,72 @@ +#define MYSQL_SERVER 1 +#include "my_sys.h" +#include "my_dbug.h" +//#include // For mysqld options. +#include "sql/query_options.h" // For mysqld options. +#include "errmsg.h" + +#include "sql.h" // For configuration parameters. +#include "m_string.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MySQLGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +MySQLGuard::MySQLGuard(const char* username, const char* password, const uint port) _THROW_(MyException) + : result_(0) { + mysql_ = mysql_init(0); + uint protocol = MYSQL_PROTOCOL_TCP; + mysql_options(mysql_, MYSQL_OPT_PROTOCOL, &protocol); + + // Use big timeouts because some operations may be long (e.g. drop Sparrow table + // may require deleting a lot of files.) + uint big = 86400; + mysql_options(mysql_, MYSQL_OPT_READ_TIMEOUT, &big); + mysql_options(mysql_, MYSQL_OPT_WRITE_TIMEOUT, &big); + if (mysql_real_connect(mysql_, 0, username, password, 0, port, 0, 0) == 0) { + MySQLGuard::check(mysql_, 0); + } +} + +void MySQLGuard::execute(const char* stmt) _THROW_(MyException) { + DBUG_PRINT("sparrow_api", ("Statement: %s", stmt)); + if (mysql_real_query(mysql_, stmt, (uint)strlen(stmt)) != 0) { + MySQLGuard::check(mysql_, stmt); + } + clear(); + result_ = mysql_store_result(mysql_); +} + +MySQLGuard::~MySQLGuard() { + clear(); + mysql_close(mysql_); +} + +// STATIC +void MySQLGuard::check(MYSQL* mysql, const char* stmt) _THROW_(MyException) { + const char* sqlError = mysql_error(mysql); + unsigned int err_code = mysql_errno(mysql); + const char* msg = "unknown error"; + if (sqlError == 0 || strlen(sqlError) == 0) { + uint e = mysql->net.last_errno; + if (e != 0) { + //msg = ER(e); + } + } else { + msg = sqlError; + } + + if (stmt == 0) { + MyException e = MyException::create(false, "Cannot connect: %u, %s", err_code, msg); + e.set_err_code( err_code ); + throw e; + } else { + // Truncate statement to 255 chars if necessary. + char tstmt[256]; + strncpy(tstmt, stmt, sizeof(tstmt)); + tstmt[sizeof(tstmt) - 1] = 0; + MyException e = MyException::create(false, "Cannot execute \"%s\": %u, %s", tstmt, err_code, msg); + e.set_err_code( err_code ); + throw e; + } +} diff --git a/storage/sparrow/api_test/sql.h b/storage/sparrow/api_test/sql.h new file mode 100644 index 000000000000..13af01fd5ff6 --- /dev/null +++ b/storage/sparrow/api_test/sql.h @@ -0,0 +1,41 @@ +#ifndef _test_sql_h_ +#define _test_sql_h_ + +//#include "my_global.h" +#include "mysql.h" +#include "common.h" +#include "exception.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MySQLGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MySQLGuard { +private: + + MYSQL* mysql_; + MYSQL_RES* result_; + +public: + + MySQLGuard(const char* username, const char* password, const uint port) _THROW_(MyException); + + void execute(const char* stmt) _THROW_(MyException); + + MYSQL_RES* get() { + return result_; + } + + ~MySQLGuard(); + + void clear() { + if (result_ != 0) { + mysql_free_result(result_); + result_ = 0; + } + } + + static void check(MYSQL* mysql, const char* stmt) _THROW_(MyException); +}; + +#endif // _test_sql_h_ diff --git a/storage/sparrow/api_test/too_many_columns.cpp b/storage/sparrow/api_test/too_many_columns.cpp new file mode 100644 index 000000000000..41c51074f506 --- /dev/null +++ b/storage/sparrow/api_test/too_many_columns.cpp @@ -0,0 +1,187 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "too_many_columns.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow2 +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MyRow2 : public SparrowRow +{ +private: + uint nbCols_; + uint iter_; + uint64_t timestamp_; + +public: + MyRow2(uint64_t timestamp, uint nbCols, uint i) : nbCols_(nbCols), iter_(i), timestamp_(timestamp) + {;} + + int decode(SparrowBuffer* buffer, void* /*dummy*/) const override; +}; + +int MyRow2::decode( SparrowBuffer* buffer, void* /*dummy*/ ) const { + int col = 0; + int res; + if ( (res=buffer->addLong( col++, timestamp_ )) != 0 ) return res; + for ( uint i=0; iaddDouble( col++, value )) != 0 ) return res; + } + return 0; +} + +//-----------------------------------------------------------------------------// + +void TestTooManyColumns::run() { + try + { + const char* table_name = "table_too_many_cols"; + + printf("Creating table %s...", table_name); + uint nbCols = 0; + AutoPtr
table( createTableTooLong( "table_2", nbCols ) ); + printf("with %u columns, OK\n", nbCols); + + sendDataFlow( table.get(), nbCols ); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + + +Table* TestTooManyColumns::createTableTooLong( const char* table_name, uint& nbCols ) +{ + Table* table = connect_->createTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 24*3600*1000; + uint64_t coalescingPeriod = 3600*1000; + uint32_t aggregationPeriod = 300; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + + nbCols = 0xFFFF/8; + + // Columns + /*Columns columns; + columns.resize( nbCols+1 ); + int col = 0; + columns.appendColumn( "timestamp", col++, COL_TIMESTAMP, 3 ); + for ( uint i=0; isetColumns( columns );*/ + + int col = 0; + table->appendColumn( "timestamp", col++, COL_TIMESTAMP, 3 ); + for ( uint i=0; iappendColumn( col_name, col++, COL_DOUBLE ); + } + + // Indexes + int indxId; + if ( (indxId=table->appendIndex( "index_1", 0, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + +Table* TestTooManyColumns::createTableManyCol( const char* table_name, uint nbCol, bool nullable ) +{ + Table* table = connect_->createTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 48*3600*1000; + uint64_t coalescingPeriod = 24*3600*1000; + uint32_t aggregationPeriod = 300; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + + // Columns + uint32_t flags = (nullable ? COL_NULLABLE : 0); + int col = 0; + table->appendColumn( "timestamp", col++, COL_TIMESTAMP, 3 ); + for ( uint k=0; kappendColumn( "double", col++, COL_DOUBLE, 0, flags); + } + + // Indexes + int indxId; + if ( (indxId=table->appendIndex( "index_1", 0, false )) < 0 ) + throw MyException::create( false, "Failed to create index" ); + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + + +void TestTooManyColumns::sendDataFlow( const Table* table, uint nbCols ) +{ + const uint32_t buffSize = 64*1024*512; + const uint32_t nbRows = 10000; + try + { + printf("Sending data flow to table %s...", table->getTableName()); + AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + + for ( uint i=0; iaddRow( row ); + if ( res == SPW_API_BUFFER_FULL ) { + printf("%u%%, ", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + printf("OK\n"); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + + diff --git a/storage/sparrow/api_test/too_many_columns.h b/storage/sparrow/api_test/too_many_columns.h new file mode 100644 index 000000000000..a27d01106b04 --- /dev/null +++ b/storage/sparrow/api_test/too_many_columns.h @@ -0,0 +1,23 @@ +#ifndef _spw_test_too_many_columns_h +#define _spw_test_too_many_columns_h + +#include "common.h" + +using namespace Sparrow; + +//----------------------------------------------------------------------------- +// Create TEST Table with too many columns + +class TestTooManyColumns : public Test { +public: + TestTooManyColumns(const SQLparams& sql_params) : Test(sql_params) {;} + + void run(); + +private: + Table* createTableTooLong(const char* table_name, uint& nbCols); + Table* createTableManyCol(const char* table_name, uint nbCol, bool nullable); + void sendDataFlow(const Table* table, uint nbCols); +}; + +#endif // _spw_test_too_many_columns_h \ No newline at end of file diff --git a/storage/sparrow/api_test/utils.h b/storage/sparrow/api_test/utils.h new file mode 100644 index 000000000000..abfbc78cd9db --- /dev/null +++ b/storage/sparrow/api_test/utils.h @@ -0,0 +1,58 @@ +#ifndef _spw_test_utils_h +#define _spw_test_utils_h + +#ifdef _WIN32 +#include "my_inttypes.h" +#else +#include "my_compiler.h" +#endif + +class SQLparams { +private: + char host_[64]; + char login_[32]; + char psswd_[32]; + uint mysql_port_; + uint spw_port_; + char schema_[64]; + +public: + SQLparams(const char* host, const char* login, const char* psswd, const uint mysql_port, const uint spw_port, const char* schema) + : mysql_port_(mysql_port), spw_port_(spw_port) + { + strncpy(host_, host, sizeof(host_) - 1); + host_[sizeof(host_) - 1] = '\0'; + strncpy(login_, login, sizeof(login_) - 1); + login_[sizeof(login_) - 1] = '\0'; + strncpy(psswd_, psswd, sizeof(psswd_) - 1); + psswd_[sizeof(psswd_) - 1] = '\0'; + strncpy(schema_, schema, sizeof(schema_) - 1); + schema_[sizeof(schema_) - 1] = '\0'; + } + + const char* getHost() const { + return host_; + } + + const char* getLogin() const { + return login_; + } + + const char* getPsswd() const { + return psswd_; + } + + uint getMySQLPort() const { + return mysql_port_; + } + + uint getSpwPort() const { + return spw_port_; + } + + const char* getSchema() const { + return schema_; + } +}; + +#endif // _spw_test_utils_h \ No newline at end of file diff --git a/storage/sparrow/api_test/vl.cpp b/storage/sparrow/api_test/vl.cpp new file mode 100644 index 000000000000..f4597f7c5868 --- /dev/null +++ b/storage/sparrow/api_test/vl.cpp @@ -0,0 +1,219 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "vl.h" + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MyRow3 +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MyRow3 : public SparrowRow +{ +private: + int idCount_; + int dataCount_; + int instance_; + uint64_t timestamp_; + +public: + MyRow3(int idCount, int dataCount, uint64_t timestamp, int instance) : idCount_(idCount), dataCount_(dataCount), + instance_(instance), timestamp_(timestamp) + {;} + + int decode(SparrowBuffer* buffer, void* /*dummy*/) const override; +}; + +int MyRow3::decode( SparrowBuffer* buffer, void* /*dummy*/ ) const { + int col = 0; + int res; + if ( (res=buffer->addLong( col++, timestamp_ )) != 0 ) return res; + for ( int i=0; iaddLong( col++, value )) != 0 ) return res; + } + for ( int i=0; iaddDouble( col++, value )) != 0 ) return res; + } + return 0; +} + +//----------------------------------------------------------------------------- + +void TestVL::run() { + try + { + const char* table_name = "table_vl"; + + AutoPtr
table(createTableAndSend( table_name, 4, 3, 2500 )); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +void TestVL::testColaescing() { + try + { + const char* table_name = "table_vl"; + const int idCount = 3, dataCount = 100; + uint nbRows = 1024*1024/(8*100); + AutoPtr
table(createTableAndSend( table_name, nbRows, idCount, dataCount )); + getchar(); + + sendData( table.get(), nbRows, idCount, dataCount ); + getchar(); + + connect_->disableCoalescing(30, sql_params_.getSchema(), true); + + } catch ( const MyException& e ) { + printf( "Test failed: %s : %s\n", e.getText(), errmsg() ); + } +} + +Table* TestVL::createTable( const char* table_name, int idCount, int dataCount ) +{ + Table* table = connect_->createTable(); + if ( !table ) + throw MyException::create( false, "Failed to create Sparrow table object for '%s'.'%s'", sql_params_.getSchema(), table_name ); + + int res; + uint64_t maxLifetime = 24*3600*1000; + uint64_t coalescingPeriod = 3600*1000; + uint32_t aggregationPeriod = 300; + + // Global parameters + table->setDatabaseName( sql_params_.getSchema() ); + table->setTableName( table_name ); + table->setMaxLifetime( maxLifetime ); + table->setCoalescPeriod( coalescingPeriod ); + table->setAggregPeriod( aggregationPeriod ); + + // Columns + int col = 0; + table->appendColumn( "time", col++, COL_TIMESTAMP, 3 ); + for ( int i=0; iappendColumn( colName, col++, COL_LONG, 0 ); + } + + for ( int i=0; iappendColumn( colName, col++, COL_DOUBLE, 0 ); + } + + // Indexes + for ( int i=0; iappendIndex( indxName, i, false ); + if ( indxId < 0 ) + throw MyException::create( false, "Failed to create index" ); + } + + // Foreign keys + for ( int i=0; iappendFK( fkName, i+1, sql_params_.getSchema(), tableName, colName ); + } + + if ( (res=table->create( connect_ )) != 0 ) + throw MyException::create( false, "Failed to create table '%s'.'%s', error code %u", sql_params_.getSchema(), table_name, res ); + + return table; +} + + +void TestVL::sendData( const Table* table, int nbRows, int idCount, int dataCount ) +{ + printf("Sending %u rows to table %s...", nbRows, table->getTableName()); + const uint buffSize = 64*1024*512; + const int nbInstances = 2; + try + { + Sparrow::AutoPtr spwBuffer( connect_->createBuffer( table, buffSize ) ); + if ( spwBuffer.get() == NULL ) + throw MyException::create( false, "Failed to create SparrowBuffer object." ); + + time_t ltime; + time( <ime ); + uint64_t now = ltime; + now *= 1000; + + for ( int i=0; iaddRow( row ); + if ( res == SPW_API_BUFFER_FULL ) { + printf("%u%%, ", (uint)(i*100.0/nbRows)); + connect_->insertData( table, spwBuffer.get() ); + spwBuffer->clear(); + } else if ( res < 0 ) { + throw MyException::create( false, "failed to add row to Sparrow buffer %d", res ); + } + if ( spwBuffer->addRow( row ) < 0 ) + throw MyException::create( false, "failed to add row to Sparrow buffer" ); + } + } + + // Send the content of the resulting Sparrow Buffer + connect_->insertData( table, spwBuffer.get() ); + + printf("OK\n"); + } + catch ( const MyException& e ) + { + printf( "Exception! %s : %s\n", e.getText(), errmsg() ); + } +} + +Table* TestVL::createTableAndSend( const char* table_name, int nbRows, int idCount, int dataCount ) +{ + static int lastIdCount = 0; + static int lastDataCount = 0; + + Table* table = NULL; + + try { + printf("Creating table %s, %u id, %u data columns...", table_name, idCount, dataCount); + Master* master = connect_->getMasterFile( sql_params_.getSchema(), table_name ); + if ( master != NULL && idCount == lastIdCount && dataCount == lastDataCount ) { + table = connect_->getTable( sql_params_.getSchema(), table_name ); + if ( table->getNbColumns() != (uint32_t)(1 + idCount + dataCount) ) { + table = NULL; + } + delete master; master = NULL; + } + if ( table == NULL ) { + lastIdCount = idCount; + lastDataCount = dataCount; + table = createTable( table_name, idCount, dataCount ); + } + printf("OK\n"); + + sendData( table, nbRows, idCount, dataCount ); + + } catch ( const MyException& e ) { + printf( "MyException %u, %s", e.getErrcode(), e.getText() ); + } + return table; +} diff --git a/storage/sparrow/api_test/vl.h b/storage/sparrow/api_test/vl.h new file mode 100644 index 000000000000..1152e8df256a --- /dev/null +++ b/storage/sparrow/api_test/vl.h @@ -0,0 +1,22 @@ +#ifndef _spw_test_vl_h +#define _spw_test_vl_h + +#include "common.h" + +using namespace Sparrow; + +//----------------------------------------------------------------------------- +class TestVL : public Test { +public: + TestVL(const SQLparams& sql_params) : Test(sql_params) {;} + + void run(); + void testColaescing(); + +private: + Table* createTableAndSend(const char* table_name, int nbRows, int idCount, int dataCount); + Table* createTable(const char* table_name, int idCount, int dataCount); + void sendData(const Table* table, int nbRows, int idCount, int dataCount); +}; + +#endif // _spw_test_vl_h \ No newline at end of file diff --git a/storage/sparrow/dns/dns.cc b/storage/sparrow/dns/dns.cc new file mode 100644 index 000000000000..7a14d913140e --- /dev/null +++ b/storage/sparrow/dns/dns.cc @@ -0,0 +1,273 @@ +/* + DNS listener and worker threads. +*/ + +#include "../handler/plugin.h" // For configuration parameters. +#include "dns.h" +#include "dnsnet.h" +#include "dnsconfiguration.h" + + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const DnsWorkerFactory DnsWorkerFactory::factory_("DnsWorker"); + +ThreadPool* DnsWorker::threadPool_ = 0; + +// STATIC +void DnsWorker::initialize() _THROW_(SparrowException) { + threadPool_ = new ThreadPool(DnsWorkerFactory::factory_, &sparrow_max_dns_worker_threads, + &SparrowStatus::get().dnsWorkerThreads_, "DnsWorker::Queue", true); +} + +// STATIC +void DnsWorker::shutdown() { + threadPool_->stop(); + delete threadPool_; + threadPool_ = 0; +} + +// STATIC +void DnsWorker::sendBuffers(DnsBuffers& buffers) { + threadPool_->send(buffers); +} + +// Worker's processing method: it handles incoming DNS responses. +bool DnsWorker::process(SYSpSlist* buffers) +{ + const uint64_t now = std::time(nullptr); + const uint64_t mnow = my_micro_time(); + + // Received a response: add entry to cache and terminate related pending requests. + DnsBuffers processedBuffers; + { + while (!buffers->isEmpty()) { + DnsBuffer& buffer = *buffers->removeAt(0); + uint32_t requestId = buffer.getId(); + DnsConfiguration& configuration = buffer.getConfiguration(); + { + Guard guard(configuration.getLock()); + DnsCacheEntry* entry = Dns::findEntry(requestId, &configuration); + if (entry == 0) { + Atomic::inc64(&SparrowStatus::get().dnsDiscardedResponses2_); + } else { + configuration.decodeResponse(now, mnow, buffer, entry); + } + } + buffer.setConfiguration(0); + processedBuffers.append(&buffer); + } + } + + // Put back buffers. + Dns::releaseBuffers(processedBuffers); + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsRequests +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This is an array of outstanding DNS requests. This way, we can easily find the request when a +// response arrives, just by extracting the reponse identifier from the packet. +// Each request is made of a pending DNS cache entry and a DNS worker thread that will take care +// of processing the response when it arrives. + +DnsRequests::DnsRequests() : lock_(false, "DnsRequests::lock_"), n_(0), free_(0) { +} + +void DnsRequests::clear(uint32_t i) { + Guard guard(lock_); + assert(i < size()); + if (requests_[i].isSet()) { + requests_[i].reset(); + if (i < free_) { + free_ = i; + } + assert(n_ > 0); + n_--; + } +} + +DnsConfiguration* DnsRequests::getConfiguration(uint32_t i) { + Guard guard(lock_); + assert(i < size()); + return requests_[i].getConfiguration(); +} + +DnsCacheEntry* DnsRequests::checkEntry(uint32_t i, DnsConfiguration* configuration) { + Guard guard(lock_); + assert(i < size()); + DnsCacheEntry* entry = requests_[i].getEntry(); + if (entry != 0 && entry->getRequestId() == i && requests_[i].getConfiguration() == configuration) { + return entry; + } else { + return 0; + } +} + +void DnsRequests::set(DnsConfiguration* configuration, DnsCacheEntry* entry) { + Guard guard(lock_); + if (n_ == size()) { + entry->delayed(true); + } else { + entry->delayed(false); + n_++; + assert(free_ < size()); + requests_[free_] = DnsRequest(configuration, entry); + entry->setRequestId(free_); + uint32_t i; + for (i = free_ + 1; i < size(); ++i) { + if (!requests_[i].isSet()) { + break; + } + } + assert(i <= size()); + free_ = i; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Dns +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This is the DNS listener thread. + +Dns* Dns::dns_ = 0; + +// Creates and starts the DNS listener. +// STATIC +void Dns::initialize() _THROW_(SparrowException) { + dns_ = new Dns(); + if (!dns_->start()) { + throw SparrowException::create(false, "Cannot start DNS thread"); + } +} + +Dns::Dns() : Thread("Dns::dns_"), lock_(false, "Dns::lock_"), cond_(false, lock_, "Dns::cond_"), + serial_(0), last_(UINT_MAX), maxSocketId_(INVALID_SOCKET), bufferLock_(false, "Dns::bufferLock_") { + FD_ZERO(&fdSet_); + from_ = SocketUtil::getAddress(0, 0); +#ifdef _WIN32 + msgBuffers_[0].buf = 0; + msgBuffers_[0].len = 0; + msgBuffers_[1].buf = staticBuffer_; + msgBuffers_[1].len = sizeof(staticBuffer_); +#else + memset(&msgheader_, 0, sizeof (msgheader_)); + msgheader_.msg_name = reinterpret_cast(from_.getSockAddr()); + msgheader_.msg_namelen = from_.getSockAddrLength(); + msgBuffers_[0].iov_base = 0; + msgBuffers_[0].iov_len = 0; + msgBuffers_[1].iov_base = static_cast(staticBuffer_); + msgBuffers_[1].iov_len = static_cast(sizeof(staticBuffer_)); + msgheader_.msg_iov = reinterpret_cast(&msgBuffers_); + msgheader_.msg_iovlen = 2; +#endif +} + +// DNS listener's processing method. +bool Dns::process() { + // Check if configuration changed. + { + Guard guard(lock_); + if (last_ != serial_) { + // Build fd_set. + maxSocketId_ = DnsSocket::fillFdSet(&fdSet_, socketIds_); + last_ = serial_; + } + } + if (socketIds_.isEmpty()) { + // Nothing to do; just sleep. + cond_.wait(1000, false); + } else { + DnsBuffers emptyBuffers; + DnsBuffers sentBuffers; + + // Wait for a network event or timeout. + fd_set fdSet = fdSet_; + struct timeval tv; + tv.tv_sec = sparrow_dns_timeout / 1000; + tv.tv_usec = (sparrow_dns_timeout * 1000) % 1000000; + int rc = select(static_cast(maxSocketId_), &fdSet, 0, 0, &tv); + if (rc > 0) { + // Prepare buffers: one buffer per selected socket. + { + Guard bufferGuard(bufferLock_); + for (int i = 0; i < rc; ++i) { + emptyBuffers.append(buffers_.isEmpty() ? new DnsBuffer(256) : buffers_.removeAt(0)); + } + } + + // Read incoming packets. + for (uint32_t i = 0; i < socketIds_.length(); ++i) { + my_socket socketId = socketIds_[i]; + if (FD_ISSET(socketId, &fdSet)) { + DnsBuffer& buffer = *emptyBuffers.removeAt(0); +#ifdef _WIN32 + msgBuffers_[0].buf = reinterpret_cast(buffer.getData()); + msgBuffers_[0].len = buffer.getCapacity(); + unsigned long packetLength = 0; + DWORD flags = 0; + int fromLength = from_.getSockAddrLength(); + if (WSARecvFrom(socketId, (LPWSABUF)&msgBuffers_, 2, &packetLength, &flags, + from_.getSockAddr(), &fromLength, 0, 0) == SOCKET_ERROR) { + packetLength = 0; + } +#else + msgBuffers_[0].iov_base = reinterpret_cast(buffer.getData()); + msgBuffers_[0].iov_len = static_cast(buffer.getCapacity()); + ssize_t packetLength = recvmsg(socketId, &msgheader_, 0); +#endif + SparrowStatus::get().dnsResponses_++; + + // We are only interested in packets likely to contain a DNS answer. + bool putBack = false; + if (packetLength > 12) { // DNS header is 12 byte long. + uint32_t capacity = buffer.getCapacity(); + buffer.setLength(packetLength); + if (packetLength > capacity) { + memcpy(buffer.getData() + capacity, staticBuffer_, packetLength - capacity); + } + + // Find request id. + DnsConfiguration* configuration = requests_.getConfiguration(buffer.getId()); + if (configuration == 0) { + putBack = true; + } else { + buffer.setConfiguration(configuration); + sentBuffers.append(&buffer); + } + } else { + putBack = true; + } + if (putBack) { + // Put back buffer. + emptyBuffers.append(&buffer); + Atomic::inc64(&SparrowStatus::get().dnsDiscardedResponses1_); + } + } + } + + // Send all received buffers to worker threads. + DnsWorker::sendBuffers(sentBuffers); + } + + if (!emptyBuffers.isEmpty()) { + // Put back buffers. + releaseBuffers(emptyBuffers); + } + } + return true; +} + +bool Dns::notifyStop() { + cond_.signal(); + return SocketUtil::notifyStopSocket(); +} + +} diff --git a/storage/sparrow/dns/dns.h b/storage/sparrow/dns/dns.h new file mode 100644 index 000000000000..a4a9b784a515 --- /dev/null +++ b/storage/sparrow/dns/dns.h @@ -0,0 +1,276 @@ +/* + DNS listener and worker threads. +*/ + +#ifndef _dns_dns_h_ +#define _dns_dns_h_ + +#include "dnsconfiguration.h" +#include "dnscache.h" + +extern uint sparrow_idle_thread_timeout; + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsRequest +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsRequest { +private: + + DnsConfigurationGuard configuration_; + DnsCacheEntry* entry_; + +public: + + DnsRequest() : configuration_(0), entry_(0) { + } + + DnsRequest(DnsConfiguration* configuration, DnsCacheEntry* entry) + : configuration_(configuration), entry_(entry) { + } + + DnsConfiguration* getConfiguration() { + return configuration_.get(); + } + + DnsCacheEntry* getEntry() { + return entry_; + } + + bool isSet() const { + return entry_ != 0; + } + + void reset() { + configuration_ = 0; + entry_ = 0; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsRequests +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Outstanding requests. +class DnsRequests { +private: + + DnsRequest requests_[65536]; + Lock lock_; + uint32_t n_; + uint32_t free_; + +private: + + uint32_t size() { + return sizeof(requests_) / sizeof(requests_[0]); + } + +public: + + DnsRequests(); + + void clear(uint32_t i); + + DnsConfiguration* getConfiguration(uint32_t i); + + DnsCacheEntry* checkEntry(uint32_t i, DnsConfiguration* configuration); + + void set(DnsConfiguration* configuration, DnsCacheEntry* entry); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsBuffer : public SYSidlink { +private: + + DnsConfigurationGuard configuration_; + SYSvector data_; + +public: + + DnsBuffer(uint32_t size) : data_(size) { + data_.forceLength(size); + } + + uint8_t* getData() { + return const_cast(data_.data()); + } + + const uint8_t* getData() const { + return data_.data(); + } + + uint32_t getLength() const { + return data_.length(); + } + + uint32_t getCapacity() const { + return data_.capacity(); + } + + void setLength(uint32_t length) { + if (length > data_.capacity()) { + data_.resize(length); + } + data_.forceLength(length); + } + + DnsConfiguration& getConfiguration() { + return *configuration_.get(); + } + + void setConfiguration(DnsConfiguration* configuration) { + configuration_ = configuration; + } + + uint32_t getId() const { + const uint8_t* b = getData(); + return (b[0] << 8) | b[1]; + } +}; + +typedef SYSpSlist DnsBuffers; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsWorker : public MessageThread { +private: + + static ThreadPool* threadPool_; + +protected: + + bool process(SYSpSlist* buffers) override; + +public: + + DnsWorker(const char* name, Queue& queue) : MessageThread(name, queue, &sparrow_idle_thread_timeout) { + } + + ~DnsWorker() { + } + + static void initialize() _THROW_(SparrowException); + static void shutdown(); + static void sendBuffers(DnsBuffers& buffer); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsWorkerFactory +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsWorkerFactory : public MessageThreadFactory, private ThreadNameGenerator { +public: + + static const DnsWorkerFactory factory_; + +public: + + DnsWorkerFactory(const char* prefix) : ThreadNameGenerator(prefix) { + } + + MessageThread* createThread(Queue& queue) const override { + return new DnsWorker(getName().c_str(), queue); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Dns +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define MAX_DNS_BUFFERS 100 + +class Dns : public Thread { +private: + + Lock lock_; + Cond cond_; + volatile uint32_t serial_; // Counter of modifications. + volatile uint32_t last_; // Value of counter when last checked. + fd_set fdSet_; + SYSvector socketIds_; + my_socket maxSocketId_; + DnsBuffers buffers_; + Lock bufferLock_; + DnsRequests requests_; + + char staticBuffer_[65536]; + SocketAddress from_; +#ifdef _WIN32 + WSABUF msgBuffers_[2]; +#else + struct msghdr msgheader_; + struct iovec msgBuffers_[2]; +#endif + + static Dns* dns_; + +protected: + + bool process() override; + + bool notifyStop() override; + + bool deleteAfterExit() override { + return false; + } + +public: + + static void initialize() _THROW_(SparrowException); + + static void shutdown() { + if (dns_ != 0) { + dns_->stop(); + delete dns_; + dns_ = 0; + } + } + + Dns(); + + ~Dns() { + } + + static void releaseBuffers(DnsBuffers& buffers) { + Guard guard(dns_->bufferLock_); + DnsBuffers& dnsBuffers = dns_->buffers_; + while (!buffers.isEmpty()) { + dnsBuffers.append(buffers.removeAt(0)); + } + while (dnsBuffers.entries() > MAX_DNS_BUFFERS) { + delete dnsBuffers.removeAt(0); + } + } + + static Lock& getLock() { + return dns_->lock_; + } + + static void incSerial() { + dns_->serial_++; + } + + static DnsCacheEntry* findEntry(uint32_t id, DnsConfiguration* configuration) { + return dns_->requests_.checkEntry(id, configuration); + } + + static void setRequest(DnsConfiguration* configuration, DnsCacheEntry* entry) { + dns_->requests_.set(configuration, entry); + } + + static void clearRequest(DnsCacheEntry* entry) { + dns_->requests_.clear(entry->getRequestId()); + } +}; + +} + +#endif /* #ifndef _dns_dns_h_ */ diff --git a/storage/sparrow/dns/dnscache.cc b/storage/sparrow/dns/dnscache.cc new file mode 100644 index 000000000000..8369e67d8255 --- /dev/null +++ b/storage/sparrow/dns/dnscache.cc @@ -0,0 +1,194 @@ +/* + DNS cache. +*/ + +#include "../handler/plugin.h" // For configuration parameters. +#include "dnscache.h" +#include "dns.h" +#include "dnsnet.h" +#include "../functions/ipaddress.h" + +namespace Sparrow { + +using namespace IvFunctions; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsCache +////////////////////////////////////////////////////////////////////////////////////////////////////// + +DnsCache::DnsCache() : SYSpHash(16384) { + Atomic::inc32(&SparrowStatus::get().dnsCaches_); +} + +DnsCache::~DnsCache() { + clear(); + Atomic::dec32(&SparrowStatus::get().dnsCaches_); +} + +void DnsCache::clear() { + int64_t size = getSize(); + SYSpHashIterator iterator(*this); + while (++iterator) { + size += iterator.key()->getSize(); + } + clearAndDestroy(); + changeSize(-size); +} + +void DnsCache::insert(DnsCacheEntry* entry) { + const int64_t before = getSize(); + SYSpHash::insert(entry); + changeSize(getSize() + entry->getSize() - before); +} + +DnsCacheEntry* DnsCache::remove(const DnsCacheEntry* entry) { + const int64_t before = getSize(); + DnsCacheEntry* removedEntry = SYSpHash::remove(entry); + changeSize(getSize() - before - entry->getSize()); + return removedEntry; +} + +// Remove from the DnsCacheEntry from the pending list, either because the DNS resolution succeeded or because it failed (decoding error or a timeout) +// and put the DnsCacheEntry back in the main cache. +void DnsCache::putBack(DnsCacheEntry* entry) { + if (entry->isPending()) { + pending_.remove(entry); + entry->pending(false); + entry->setSent(0); + Dns::clearRequest(entry); + Atomic::dec64(&SparrowStatus::get().dnsCachePendingEntries_); + lru_.append(entry); // Put back in lru_ as it had been removed when it became pending. + } +} + +DnsCacheEntry* DnsCache::doResolve(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, const int id, + const uint8_t* address, const uint32_t length) { + if (length != 4 && length != 16) { + return 0; + } + Atomic::inc64(&SparrowStatus::get().dnsCacheAcquires_); + DnsCacheEntry key(id, address, length); + DnsCacheEntry* found = find(&key); + if (found != 0) { + Atomic::inc64(&SparrowStatus::get().dnsCacheHits_); + } + if (found == 0) { + DnsCacheEntry* entry = new DnsCacheEntry(id, address, length); + insert(entry); + Atomic::inc64(&SparrowStatus::get().dnsCacheEntries_); + lru_.append(entry); + if (!doRequest(configuration, now, mnow, entry, false)) { + found = entry; + } + } else if (found->isPending()) { + found = 0; + } else if (found->hasExpired(now)) { + if (doRequest(configuration, now, mnow, found, false)) { + found = 0; + } + } else { + // Valid entry found: put it to the LRU top. + lru_.remove(found); + lru_.append(found); + } + return found; +} + +// Send the DNS resolution request. Returns true if the given entry is pending. +bool DnsCache::doRequest(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, + DnsCacheEntry* entry, const bool uponError) { + // If it's a re-try because the previous response could not be decoded correctly + // and the request cannot be sent again (for any reason), then remove this request from the pending list. + if (uponError) { + if (!configuration->sendRequest(now, mnow, entry, true)) { + putBack(entry); + return false; + } + } else { + assert(!entry->isPending()); + if (configuration->sendRequest(now, mnow, entry, false)) { + lru_.remove(entry); // Remove from lru_ as a DnsCacheEntry cannot be in both lists at once + entry->pending(true); + pending_.append(entry); + Atomic::inc64(&SparrowStatus::get().dnsCachePendingEntries_); + } else { + return false; + } + } + return true; +} + +void DnsCache::doRetry(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, + DnsCacheEntry* entry) { + doRequest(configuration, now, mnow, entry, true); +} + +// Called regularly to handle timeouts. +void DnsCache::processTimeout(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, const uint64_t timeout) { + // Check if we can retry or terminate pending requests, or send delayed requests. + DnsCacheEntry* entry = pending_.first(); + while (entry != 0) { + assert(entry->isPending()); + DnsCacheEntry* next = entry->next_; + if (entry->hasTimedOut(mnow, timeout) && !configuration->sendRequest(now, mnow, entry, false)) { + putBack(entry); + } + entry = next; + } +} + +// Called regularly to purge obsolete entries. +void DnsCache::processPurge(const uint64_t now, DnsCacheEntries& obsoleteEntries) { + DnsCacheEntry* entry = lru_.first(); + while (entry != 0) { + assert(!entry->isPending()); + DnsCacheEntry* next = entry->next_; + if (entry->hasExpired(now)) { + lru_.remove(entry); + remove(entry); + obsoleteEntries.append(entry); + } + entry = next; + } +} + +// Called regularly to control DNS cache size. +void DnsCache::processControlSize(DnsCacheEntries& entries) { + if (sparrow_max_dns_cache_size == 0) { + // No size limit. + return; + } + const uint64_t total = Atomic::get64(&SparrowStatus::get().dnsCacheSize_); + if (total == 0) { + return; + } + const double ratio = static_cast(sparrow_max_dns_cache_size) / total; + const uint32_t maxEntries = static_cast(ratio * this->entries()); + while (lru_.entries() > maxEntries) { + DnsCacheEntry* entry = lru_.removeFirst(); + remove(entry); + entries.append(entry); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsCacheEntry +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Resolve entry: set name and TTL. +void DnsCacheEntry::resolve(const uint64_t now, const char* name, const int length, const uint32_t ttl) { + int64_t delta = -static_cast(getSize()); + name_ = Str(name, length); + DnsCache::changeSize(delta + getSize()); + timestamp_ = now + ttl; +} + +// Name not found, or no response from DNS server: the name is the address as a string. +void DnsCacheEntry::resolve(const uint64_t now, const uint32_t ttl) { + char buffer[128]; + IpAddress address(address_, v6_ ? 16 : 4); + const uint32_t length = address.print(buffer); + resolve(now, buffer, static_cast(length), ttl); // TTL is one hour. +} + +} diff --git a/storage/sparrow/dns/dnscache.h b/storage/sparrow/dns/dnscache.h new file mode 100644 index 000000000000..da4eceba6345 --- /dev/null +++ b/storage/sparrow/dns/dnscache.h @@ -0,0 +1,237 @@ +/* + DNS cache. +*/ + +#ifndef _dns_cache_h_ +#define _dns_cache_h_ + +#include "../engine/types.h" +#include "../engine/list.h" +#include "../handler/plugin.h" // For SparrowStatus. + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsCacheEntry +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* IP address and host name. +*/ + +class DnsCacheEntry : public SYSidlink { +private: + + // Identifier (of the list of DNS servers). + int id_; + + // IP address. + uint8_t address_[16]; + + // Name. + Str name_; + + // Is this an IPv6 address? + uint32_t v6_:1; + + // Is this entry pending? + // (The name has not yet been received from the DNS server.) + uint32_t pending_:1; + + // Is this entry delayed? + // (The request could not be sent because there was no available request id.) + uint32_t delayed_:1; + + // If this entry is pending, how many times the request has been sent. + uint32_t sent_:5; + + // If this entry is pending, on which DNS server was the request sent. This attribute + // enables cycling through all the DNS servers defined for the entry id. + uint32_t serverId_:8; + + // If this entry is pending, this gives the request id. + uint16_t requestId_:16; + + // Timestamp: timeout if this entry is pending, or expiration if this entry is in the cache. + // Used in several ways: + // 1) when the request has been sent, timestamp_ records the time at which it was sent. Used for requests timeout. + // 2) if a response has been received and correctly decoded, timestamp_ is the TTL as returned by the DNS server (time during which this name resolution will remain valid, probably several years). + // 3) if the DNS resolution failed, timestamp_ is the expiration time: the time during which the entry stays in the cache. + // Once it has been purged out, if this IP comes up again, a new DnsCacheEntry will be created and go through the whole cycle again. + // So, in other words, timestamp_ represents the delay to wait before trying again to resolve that IP. + uint64_t timestamp_; + +private: + + DnsCacheEntry(const DnsCacheEntry& right); + DnsCacheEntry& operator = (const DnsCacheEntry& right); + +public: + + DnsCacheEntry(const int id, const uint8_t* address, const uint32_t length) + : id_(id), v6_(length == 16), pending_(false), delayed_(false), + sent_(0), serverId_(0), requestId_(0), timestamp_(0) { + assert(length <= sizeof(address_)); + memcpy(address_, address, length); + } + + ~DnsCacheEntry() { + } + + void resolve(const uint64_t now, const char* name, const int length, const uint32_t ttl); + + void resolve(const uint64_t now, const uint32_t ttl); + + int getId() const { + return id_; + } + + bool isPending() const { + return pending_; + } + + void pending(const bool flag) { + pending_ = flag; + } + + bool isDelayed() const { + return delayed_; + } + + void delayed(const bool flag) { + delayed_ = flag; + } + + uint32_t getSent() const { + return sent_; + } + + void setSent(const uint32_t sent) { + sent_ = sent; + } + + uint32_t getServerId() const { + return serverId_; + } + + void setServerId(const uint32_t serverId) { + serverId_ = serverId; + } + + uint32_t getRequestId() const { + return requestId_; + } + + void setRequestId(const uint32_t requestId) { + requestId_ = requestId; + } + + void setTimestamp(const uint64_t timestamp) { + timestamp_ = timestamp; + } + + bool hasExpired(const uint64_t now) const { + return now > timestamp_; + } + + bool hasTimedOut(const uint64_t mnow, const uint64_t timeout) const { + return mnow >= timestamp_ + timeout; + } + + const Str& getName() const { + return name_; + } + + Str& getName() { + return name_; + } + + const uint8_t* getAddress() const { + return address_; + } + + bool isV6() const { + return v6_; + } + + bool operator == (const DnsCacheEntry& right) const { + return id_ == right.id_ && v6_ == right.v6_ && memcmp(address_, right.address_, v6_ ? 16 : 4) == 0; + } + + uint32_t hash() const { + uint32_t h = 1; + uint32_t i = v6_ ? 16 : 4; + while (i-- > 0) { + h = 31 * h + address_[i]; + } + h = 31 * h + id_; + return h; + } + + uint32_t getSize() const { + return sizeof(*this) + name_.length(); + } +}; + +typedef SYSidlist DnsCacheEntries; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsCache +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* The SYSpHash contains all cache entries. +*/ +class DnsConfiguration; +class DnsCache : private SYSpHash { +private: + + // Entries waiting for a name resolution. Double linked list of pointers to the cache entries waiting for a reply from a DNS server + DnsCacheEntries pending_; + + // Cache LRU for eviction. Cache entries at the beginning have not been accessed for a long time and therefore are candidates for the purge. + // Each time a cache entry is used, it's put back to the end of the list. New cache entries are also put at the end. + // Entries in this list cannot also be in the pending_ list: pending requests cannot be purged out. + DnsCacheEntries lru_; + +private: + + void purge(DnsCacheEntries& entries); + +public: + + DnsCache(); + + ~DnsCache(); + + void processTimeout(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, const uint64_t timeout); + + void processPurge(const uint64_t now, DnsCacheEntries& obsoleteEntries); + + void processControlSize(DnsCacheEntries& entries); + + void putBack(DnsCacheEntry* entry); + + void insert(DnsCacheEntry* entry); + + DnsCacheEntry* remove(const DnsCacheEntry* entry); + + DnsCacheEntry* doResolve(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, const int id, + const uint8_t* address, const uint32_t length); + + bool doRequest(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, + DnsCacheEntry* entry, const bool uponError); + + void doRetry(DnsConfiguration* configuration, const uint64_t now, const uint64_t mnow, + DnsCacheEntry* entry); + + void clear(); + + static void changeSize(const int64_t delta) { + if (delta != 0) { + Atomic::add64(&SparrowStatus::get().dnsCacheSize_, delta); + } + } +}; + +} + +#endif /* #ifndef _dns_cache_h_ */ diff --git a/storage/sparrow/dns/dnsconfiguration.cc b/storage/sparrow/dns/dnsconfiguration.cc new file mode 100644 index 000000000000..a65ef0c02531 --- /dev/null +++ b/storage/sparrow/dns/dnsconfiguration.cc @@ -0,0 +1,265 @@ +/* + DNS configuration and API. +*/ + +#include "dnsconfiguration.h" +#include "dnsnet.h" +#include "dns.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsConfiguration +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This is the DNS configuration for one or several master files. For each listed DNS identifier, it gives a set +// of DNS servers. + +SYSpHash DnsConfiguration::hash_(16); +Lock DnsConfiguration::hashLock_(true, "DnsConfiguration::hashLock_"); +const DnsConfigId DnsConfigId::DEFAULT(-1); + +DnsConfiguration::DnsConfiguration() : SYShash(256), RefCounted(), lock_(false, DnsConfiguration::getName().c_str()), cache_(0), masterFiles_(0) { +} + +DnsConfiguration::DnsConfiguration(const DnsConfiguration& right) + : SYShash(256), RefCounted(), lock_(false, DnsConfiguration::getName().c_str()), cache_(0), masterFiles_(0) { + SYShash::operator = (right); +} + +// STATIC +Str DnsConfiguration::getName() { + char tmp[128]; + static volatile uint32_t counter = 0; + snprintf(tmp, sizeof(tmp), "DnsConfiguration(%u)::lock_", Atomic::inc32(&counter)); + return Str(tmp); +} + +DnsConfiguration::~DnsConfiguration() { + if (cache_ != 0) { + delete cache_; + Guard guard(Dns::getLock()); + Dns::incSerial(); + } +} + +// Start DNS server connections. +void DnsConfiguration::start() _THROW_(SparrowException) { + SPARROW_ENTER("DnsConfiguration::start"); + assert(cache_ == 0); + SYShashIterator iterator(*this); + SYSslist toRemove; + while (++iterator) { + DnsServers& servers = iterator.key().getServers(); + for (uint32_t i = 0; i < servers.length(); ++i) { + try { + servers[i].start(); + } catch(const SparrowException& e) { + e.toLog(); + servers.removeAt(i--); + } + } + if (servers.isEmpty()) { + toRemove.append(iterator.key()); + } + } + while (!toRemove.isEmpty()) { + remove(toRemove.first()); + toRemove.removeFirst(); + } + cache_ = new DnsCache(); + tasks_.append(new DnsTimeoutTask(this)); + tasks_.append(new DnsPurgeTask(this)); + tasks_.append(new DnsSizeTask(this)); + for (uint32_t i = 0; i < tasks_.length(); ++i) { + Scheduler::addTask(tasks_[i]); + } + Guard guard(Dns::getLock()); + Dns::incSerial(); +} + +DnsCacheEntry* DnsConfiguration::doResolve(const uint64_t now, const uint64_t mnow, const int id, const uint8_t* address, const uint32_t length) { + SPARROW_ENTER("DnsConfiguration::doResolve"); + if (find(DnsConfigId(id)) == 0) { + return cache_->doResolve(this, now, mnow, -1, address, length); + } else { + return cache_->doResolve(this, now, mnow, id, address, length); + } +} + +// Tries to send a DNS request for the given DNS cache entry. The uponError flag tells the method +// if we retry a request following an error in the response. +// Returns true if the request is now pending, false if we had to perform a default resolution. +bool DnsConfiguration::sendRequest(const uint64_t now, const uint64_t mnow, DnsCacheEntry* entry, const bool uponError) { + SPARROW_ENTER("DnsConfiguration::sendRequest"); + + // Actually send or retry the request if: + // - The entry identifier exists. + // - And we have not reached the maximum number of retries. + // - And the uponError flag is false or the request as been sent less times than + // the number of servers (i.e. there is a chance this error does not occur on another server). + // Otherwise, use default resolution with a variable TTL. + DnsConfigId* configId = find(DnsConfigId(entry->getId())); + if (configId == 0) { + configId = find(DnsConfigId::DEFAULT); + } + const uint32_t sent = entry->getSent(); + if (configId != 0 && sent <= sparrow_dns_retries + && (!uponError || sent < configId->getServers().length())) { + DnsServers& servers = configId->getServers(); + const uint32_t serverId = (sent == 0 ? configId->getCurrent() : entry->getServerId() + 1) % servers.length(); + entry->setServerId(serverId); + entry->setSent(sent + 1); + entry->setTimestamp(mnow); + try { + if (sent == 1 || entry->isDelayed()) { + // Set the outstanding request only if this is not a retry. + Dns::setRequest(this, entry); + if (entry->isDelayed()) { + // No request id available: will send later. + return true; + } + } + uint8_t buffer[1024]; + const uint32_t length = DnsNet::forgeQuery(buffer, sizeof(buffer), entry->getRequestId(), + true, entry->getAddress(), entry->isV6()); + assert(length <= sizeof(buffer)); + DnsServerConnection& connection = servers[serverId].getConnection(); + connection.send(buffer, length); + Atomic::inc64(&SparrowStatus::get().dnsRequests_); + if (sent > 1) { + Atomic::inc64(&SparrowStatus::get().dnsRetries_); + } + return true; + } catch(const SparrowException&) { + entry->resolve(now, 3600); + return false; + } + } else { + if (uponError || configId == 0 || configId->getServers().isEmpty()) { + // TTL = 1 hour in case of error: it is unlikely we get a good response soon. + entry->resolve(now, 3600); + } else { + // TTL = 1 minute in case of timeout: the DNS server(s) is(are) probably momentarily + // overloaded, so retry soon. + entry->resolve(now, 60); + } + return false; + } +} + +// Decode the DNS response. If it succeeded, remove the DnsCacheEntry from the pending list and out it back in the main cache. +// Otherwise, retry sending the DNS resolution request. +void DnsConfiguration::decodeResponse(uint64_t now, uint64_t mnow, const DnsBuffer& buffer, DnsCacheEntry* entry) { + SPARROW_ENTER("DnsConfiguration::decodeResponse"); + // In some rare situations, the DnsCacheEntry may not be pending anymore: if the first request timed out and a new request was issued. + // But the response to the first request finally arrived and was processed, the DnsCacheEntry was removed from the pending list and put back in the main cache. + if (!entry->isPending()) { + Atomic::inc64(&SparrowStatus::get().dnsDiscardedResponses3_); + return; + } + if (DnsNet::decodeResponse(now, buffer.getData(), buffer.getLength(), *entry)) { + cache_->putBack(entry); + } else { + // We get an error: retry or terminate. + cache_->doRetry(this, now, mnow, entry); + } +} + +// Called every time the DNS timeout expires. +void DnsConfiguration::processTimeout(const uint64_t now, const uint64_t mnow, const uint64_t timeout) { + SPARROW_ENTER("DnsConfiguration::processTimeout"); + cache_->processTimeout(this, now, mnow, timeout); +} + +void DnsConfiguration::processPurge(const uint64_t now, DnsCacheEntries& obsoleteEntries) { + SPARROW_ENTER("DnsConfiguration::processPurge"); + cache_->processPurge(now, obsoleteEntries); +} + +void DnsConfiguration::processControlSize(DnsCacheEntries& entries) { + SPARROW_ENTER("DnsConfiguration::processControlSize"); + cache_->processControlSize(entries); +} + +Str DnsConfiguration::print() { + Str s; + SYShashIterator iterator(*this); + while (++iterator) { + const DnsConfigId& configId = iterator.key(); + char buffer[2048]; + snprintf(buffer, sizeof(buffer), "\n%d: ", configId.getId()); + s += Str(buffer); + const DnsServers& servers = configId.getServers(); + for (uint32_t i = 0; i < servers.length(); ++i) { + snprintf(buffer, sizeof(buffer), i == 0 ? "(%s)" : ", (%s)", servers[i].print().c_str()); + s += Str(buffer); + } + } + return s; +} + +// STATIC +DnsConfiguration* DnsConfiguration::acquire(const DnsConfiguration& key) { + SPARROW_ENTER("DnsConfiguration::acquire"); + Guard guard(hashLock_); + DnsConfiguration* dnsConfiguration = hash_.find(&key); + if (dnsConfiguration == 0) { + dnsConfiguration = new DnsConfiguration(key); + dnsConfiguration->start(); + hash_.insert(dnsConfiguration); + } + dnsConfiguration->masterFiles_++; + return dnsConfiguration; +} + +// STATIC +void DnsConfiguration::release(DnsConfiguration* dnsConfiguration) { + SPARROW_ENTER("DnsConfiguration::release"); + assert(dnsConfiguration->isStarted()); + Guard guard(hashLock_); + if (--dnsConfiguration->masterFiles_ == 0) { + DBUG_PRINT("sparrow_dns", ("DNS configuration has zero reference")); + hash_.remove(dnsConfiguration); + for (uint32_t i = 0; i < dnsConfiguration->tasks_.length(); ++i) { + dnsConfiguration->tasks_[i]->stop(); + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsPurgeTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void DnsPurgeTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + DnsCacheEntries obsoleteEntries; + { + Guard guard(dnsConfiguration_->getLock()); + dnsConfiguration_->processPurge(std::time(nullptr), obsoleteEntries); + } + Atomic::add64(&SparrowStatus::get().dnsCacheEntries_, -static_cast(obsoleteEntries.entries())); + while (!obsoleteEntries.isEmpty()) { + DnsCacheEntry* entry = obsoleteEntries.removeFirst(); + delete entry; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsSizeTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void DnsSizeTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + DnsCacheEntries entries; + { + Guard guard(dnsConfiguration_->getLock()); + dnsConfiguration_->processControlSize(entries); + } + Atomic::add64(&SparrowStatus::get().dnsCacheEvictions_, static_cast(entries.entries())); + Atomic::add64(&SparrowStatus::get().dnsCacheEntries_, -static_cast(entries.entries())); + while (!entries.isEmpty()) { + DnsCacheEntry* entry = entries.removeFirst(); + delete entry; + } +} + +} diff --git a/storage/sparrow/dns/dnsconfiguration.h b/storage/sparrow/dns/dnsconfiguration.h new file mode 100644 index 000000000000..e0378d42c135 --- /dev/null +++ b/storage/sparrow/dns/dnsconfiguration.h @@ -0,0 +1,279 @@ +/* + DNS configuration. +*/ + +#ifndef _dns_dnsconfiguration_h_ +#define _dns_dnsconfiguration_h_ + +#include "dnsserver.h" +#include "../handler/plugin.h" // For configuration parameters. + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsConfigId +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* A DNS configuration is a group of DNS servers to use for IP resolution. +*/ + +class DnsConfigId { + friend ByteBuffer& operator >> (ByteBuffer& buffer, DnsConfigId& id); + friend ByteBuffer& operator << (ByteBuffer& buffer, const DnsConfigId& id); + +private: + + int id_; // DNS identifier. + bool key_; // Is this configId a search key? + DnsServers servers_; // DNS servers. + mutable RefCounter current_; // Current DNS server (round robin). + +public: + + static const DnsConfigId DEFAULT; + +public: + + DnsConfigId() : id_(-1), key_(false) { + } + + DnsConfigId(int id) : id_(id), key_(true) { + } + + DnsConfigId(int id, const DnsServers& servers) : id_(id), key_(false), servers_(servers) { + } + + int getId() const { + return id_; + } + + bool operator == (const DnsConfigId& right) const { + return id_ == right.id_ && (key_ || right.key_ || servers_ == right.servers_); + } + + const DnsServers& getServers() const { + return servers_; + } + + DnsServers& getServers() { + return servers_; + } + + uint32_t getCurrent() const { + return current_++; + } + + uint32_t hash() const { + return id_; + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const DnsConfigId& id) { + buffer << id.id_ << id.servers_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, DnsConfigId& id) { + buffer >> id.id_ >> id.servers_; + + // Replace "@default" servers by the list of default DNS servers. + DnsServers extendedServers; + for (uint32_t i = 0; i < id.servers_.length(); ++i) { + const DnsServer& server = id.servers_[i]; + if (strcmp(server.getHost(), "@default") == 0) { + for (int j = 0; j < DnsDefault::getNumber(); ++j) { + extendedServers.append(DnsServer(DnsDefault::getServer(j), server.getPort(), + server.getSourceAddress(), server.getSourcePort())); + } + } else { + extendedServers.append(server); + } + } + id.servers_ = extendedServers; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsConfiguration +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsBuffer; +class DnsTimeoutTask; +class DnsPurgeTask; +class DnsCache; +class DnsCacheEntry; +typedef SYSidlist DnsCacheEntries; +class DnsConfiguration : public SYShash, public RefCounted { + + friend class DnsTimeoutTask; + +private: + + Lock lock_; + DnsCache* cache_; + + // Number of master files currently referencing this configuration. + uint32_t masterFiles_; + + // Tasks. + SYSpVector tasks_; + + static SYSpHash hash_; + static Lock hashLock_; + +private: + + static Str getName(); + +public: + + DnsConfiguration(); + + ~DnsConfiguration(); + + DnsConfiguration(const DnsConfiguration& right); + + Lock& getLock() { + return lock_; + } + + void start() _THROW_(SparrowException); + + bool isStarted() const { + return cache_ != 0; + } + + DnsCacheEntry* doResolve(const uint64_t now, const uint64_t mnow, const int id, const uint8_t* address, const uint32_t length); + + bool sendRequest(const uint64_t now, const uint64_t mnow, DnsCacheEntry* entry, const bool uponError); + + void decodeResponse(uint64_t now, uint64_t mnow, const DnsBuffer& buffer, DnsCacheEntry* entry); + + void processTimeout(const uint64_t now, const uint64_t mnow, const uint64_t timeout); + + void processPurge(const uint64_t now, DnsCacheEntries& obsoleteEntries); + + void processControlSize(DnsCacheEntries& entries); + + bool operator == (const DnsConfiguration& right) const { + return SYShash::operator == (right); + } + + uint32_t hash() const { + uint32_t h = 0; + SYShashIterator iterator(*this); + while (++iterator) { + h = h + iterator.key().hash(); + } + return h; + } + + Str print(); + + static DnsConfiguration* acquire(const DnsConfiguration& key); + + static void release(DnsConfiguration* dnsConfiguration); + +private: + + DnsConfiguration& operator = (const DnsConfiguration& right); +}; + +typedef RefPtr DnsConfigurationGuard; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsTimeoutTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsTimeoutTask : public Task { +private: + + DnsConfigurationGuard dnsConfiguration_; + +public: + + DnsTimeoutTask(DnsConfiguration* dnsConfiguration) + : Task(Worker::getQueue()), dnsConfiguration_(dnsConfiguration) { + } + + virtual bool operator == (const DnsTimeoutTask& right) const { + return *dnsConfiguration_ == *right.dnsConfiguration_; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return sparrow_dns_timeout; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException) { + Guard guard(dnsConfiguration_->getLock()); + dnsConfiguration_->processTimeout(std::time(nullptr), my_micro_time(), sparrow_dns_timeout * 1000); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsPurgeTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsPurgeTask : public Task { +private: + + DnsConfigurationGuard dnsConfiguration_; + +public: + + DnsPurgeTask(DnsConfiguration* dnsConfiguration) + : Task(Worker::getQueue()), dnsConfiguration_(dnsConfiguration) { + } + + virtual bool operator == (const DnsPurgeTask& right) const { + return *dnsConfiguration_ == *right.dnsConfiguration_; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 300000; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsSizeTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsSizeTask : public Task { +private: + + DnsConfigurationGuard dnsConfiguration_; + +public: + + DnsSizeTask(DnsConfiguration* dnsConfiguration) + : Task(Worker::getQueue()), dnsConfiguration_(dnsConfiguration) { + } + + virtual bool operator == (const DnsSizeTask& right) const { + return *dnsConfiguration_ == *right.dnsConfiguration_; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 5000; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +} + +#endif /* #ifndef _dns_dnsconfiguration_h_ */ diff --git a/storage/sparrow/dns/dnsdefault.cc b/storage/sparrow/dns/dnsdefault.cc new file mode 100644 index 000000000000..ca952f034ce9 --- /dev/null +++ b/storage/sparrow/dns/dnsdefault.cc @@ -0,0 +1,80 @@ +/* + Default DNS server. +*/ + +#include "dnsdefault.h" + +#ifdef _WIN32 +#include +#include +#elif defined(__linux__) || defined(__MACH__) +#include +#include +#include +#include +#include +#include +#include +#elif defined(__SunOS) +#include +#include +#include +#include +#include +#include +#include +#endif +#include +#include + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsDefault +////////////////////////////////////////////////////////////////////////////////////////////////////// + +int DnsDefault::n_ = 0; +char** DnsDefault::servers_ = DnsDefault::initialize(); + +// STATIC +char** DnsDefault::initialize() { +#ifdef _WIN32 // Windows. + BYTE buffer[16384]; + memset(buffer, 0, sizeof(buffer)); + DWORD size = sizeof(buffer); + DNS_STATUS status = DnsQueryConfig(DnsConfigDnsServerList, 0, 0, 0, buffer, &size); + IP4_ARRAY& response = *(IP4_ARRAY*)(buffer); + if (status == 0 && response.AddrCount > 0) { + n_ = response.AddrCount; + servers_ = new char*[n_ ]; + for (int i = 0; i < n_; ++i) { + IP4_ADDRESS address = response.AddrArray[i]; + servers_[i] = new char[16]; + _snprintf(servers_[i], 16, "%u.%u.%u.%u", address & 0xff, (address >> 8) & 0xff, + (address >> 16) & 0xff, (address >> 24) & 0xff); + } + } + // TODO IPv6 - http://groups.google.com/group/microsoft.public.platformsdk.networking.ipv6/browse_frm/thread/0ce031e8b4ee2fd9?hl=en# +#elif defined(__linux__) || defined(__SunOS) || defined(__MACH__) // Linux, Solaris and MacOS. + struct __res_state* resState = reinterpret_cast(new unsigned char[sizeof(struct __res_state)]); + memset(resState, 0, sizeof(struct __res_state)); + if (res_ninit(resState) == 0 && resState->nscount > 0) { + n_ = resState->nscount; + servers_ = new char*[n_ ]; + for (int i = 0; i < n_; ++i) { + struct in_addr address = resState->nsaddr_list[i].sin_addr; + servers_[i] = new char[INET6_ADDRSTRLEN]; + + // Try IPv6 if IPv4 fails. + if (inet_ntop(AF_INET, &address, servers_[i], INET6_ADDRSTRLEN) == 0) { + inet_ntop(AF_INET6, &address, servers_[i], INET6_ADDRSTRLEN); + } + } + } +#else +#error Missing DnsDefault for this platform. +#endif + return n_ == 0 ? 0 : servers_; +} + +} diff --git a/storage/sparrow/dns/dnsdefault.h b/storage/sparrow/dns/dnsdefault.h new file mode 100644 index 000000000000..3a6df7b6234f --- /dev/null +++ b/storage/sparrow/dns/dnsdefault.h @@ -0,0 +1,37 @@ +/* + Default DNS server. +*/ + +#ifndef _dns_default_h_ +#define _dns_default_h_ + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsDefault +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsDefault { +private: + + static char** servers_; + static int n_; + +private: + + static char** initialize(); + +public: + + static int getNumber() { + return n_; + } + + static const char* getServer(int i) { + return servers_[i]; + } +}; + +} + +#endif /* #ifndef _dns_default_h_ */ diff --git a/storage/sparrow/dns/dnsnet.cc b/storage/sparrow/dns/dnsnet.cc new file mode 100644 index 000000000000..9b1c89bf1812 --- /dev/null +++ b/storage/sparrow/dns/dnsnet.cc @@ -0,0 +1,386 @@ +/* + DNS network utilities. + See http://www.tcpipguide.com/free/t_DNSMessageHeaderandQuestionSectionFormat.htm + for more information about DNS message format. +*/ + +#include "dnsnet.h" +#include "dnscache.h" +#include "../functions/ipaddress.h" + +#define MYSQL_SERVER 1 +//#include +#include "sql/query_options.h" // For mysqld options. +#include "../engine/log.h" +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace Sparrow { + +using namespace IvFunctions; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsNet +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const char* DnsNet::nibbles_ = "0123456789abcdef"; + +// To write IPv4 address CNAME without sprintf. +const char* DnsNet::format_ = "" +"0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222" +"0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999000000000011111111112222222222333333333344444444445555555555666666666677777777778888888888999999999900000000001111111111222222222233333333334444444444555555" +"0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345"; + + +const char DnsNet::ipV4Suffix_[] = { '\x07', 'i', 'n', '-', 'a', 'd', 'd', 'r', '\x04', 'a', 'r', 'p', 'a', '\x00' }; + +const char DnsNet::ipV6Suffix_[] = { '\x03', 'i', 'p', '6', '\x04', 'a', 'r', 'p', 'a', '\x00' }; + +// STATIC +uint32_t DnsNet::forgeQuery(uint8_t* buffer, uint32_t length, uint16_t id, bool recurse, + const uint8_t* address, bool v6) { + // Check length. + // - 12 bytes for the header. + // - 4 + 2 + 28 bytes max for the IPv4 PTR record. + // - 4 + 2 + 72 bytes for the IPv6 PTR record. + // -> Total max length is 90 bytes. + if (length < 90) { + return 0; + } + uint8_t* saved = buffer; + + // Write header info. + + // Identifier. + *buffer++ = static_cast(id >> 8); + *buffer++ = static_cast(id & 0xff); + + // Flags. + uint16_t flags = recurse ? 0x100 : 0; + *buffer++ = static_cast(flags >> 8); + *buffer++ = static_cast(flags & 0xff); + + // Number of questions. + *buffer++ = 0; + *buffer++ = 1; + + // Number of answer RRs, authority RRs and additional RRs are all null. + memset(buffer, 0, 6); + buffer += 6; + + // CNAME. + buffer += encodeCName(buffer, address, v6); + + // PTR type. + *buffer++ = 0; + *buffer++ = static_cast(0xc); + + // IN class. + *buffer++ = 0; + *buffer++ = 1; + return static_cast(buffer - saved); +} + +// STATIC +bool DnsNet::decodeResponse(uint64_t now, const uint8_t* buffer, uint32_t length, DnsCacheEntry& entry) { + uint8_t tmp[256]; + uint8_t tmp2[256]; + const uint8_t* ref = buffer; + + // This has been checked before. + assert(length > 12); + const uint32_t maxOffset = length - 1; + + // Skip identifier. + buffer += 2; + + // Check flags: need a response without error. + uint16_t flags = *buffer++ << 8; + flags |= *buffer++; + if ((flags & 0x800f) != 0x8000) { + switch (flags & 0xf) { + case 1: Atomic::inc64(&SparrowStatus::get().dnsErrorsFormat_); break; + case 2: Atomic::inc64(&SparrowStatus::get().dnsErrorsFailure_); break; + case 3: Atomic::inc64(&SparrowStatus::get().dnsErrorsName_); break; + case 4: Atomic::inc64(&SparrowStatus::get().dnsErrorsNotImplemented_); break; + case 5: Atomic::inc64(&SparrowStatus::get().dnsErrorsRefused_); break; + case 6: Atomic::inc64(&SparrowStatus::get().dnsErrorsYXDomain_); break; + case 7: Atomic::inc64(&SparrowStatus::get().dnsErrorsYXRRSet_); break; + case 8: Atomic::inc64(&SparrowStatus::get().dnsErrorsNXRRSet_); break; + case 9: Atomic::inc64(&SparrowStatus::get().dnsErrorsNotAuth_); break; + case 10: Atomic::inc64(&SparrowStatus::get().dnsErrorsNotZone_); break; + default: Atomic::inc64(&SparrowStatus::get().dnsErrorsUnknown_); break; + } + return false; + } + + // Need one question and at least one answer. + uint16_t questions = *buffer++ << 8; + questions |= *buffer++; + if (questions != 1) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + uint16_t answers = *buffer++ << 8; + answers |= *buffer++; + if (answers == 0) { + Atomic::inc64(&SparrowStatus::get().dnsNoAnswer_); + return false; + } + + // Skip Authority RRs and additional RRs. + buffer += 4; + length -= 12; + + // Skip query CNAME. + uint32_t nameLength = decodeCName(ref, static_cast(buffer - ref), maxOffset, tmp, sizeof(tmp) - 1); + if (nameLength == 0) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + buffer += nameLength; + assert(length > nameLength); + length -= nameLength; + + // Check query is a PTR record with IN class. + if (length < 4) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + length -= 4; + uint16_t type = *buffer++ << 8; + type |= *buffer++; + uint16_t klass = *buffer++ << 8; + klass |= *buffer++; + if (type != 0x0c || klass != 1) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + + // Get first PTR record and check address. + uint8_t address[80]; + const uint32_t addressLength = encodeCName(address, entry.getAddress(), entry.isV6()); + assert(addressLength <= sizeof(address)); + const uint32_t addressLength2 = decodeCName(address, 0, addressLength - 1, tmp2, sizeof(tmp2) - 1); + if (addressLength2 == 0) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + nameLength = decodeCName(ref, static_cast(buffer - ref), maxOffset, tmp, sizeof(tmp) - 1); + if (nameLength == 0) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + buffer += nameLength; + assert(length > nameLength); + length -= nameLength; + if (memcmp(tmp, tmp2, addressLength2) != 0) { + // Address mismatch: this can occur in case of a late response + // for a request id reused meanwhile. + Atomic::inc64(&SparrowStatus::get().dnsDiscardedResponses4_); + return false; + } + + // Check answer is a PTR record with IN class. + if (length < 4) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + length -= 4; + type = *buffer++ << 8; + type |= *buffer++; + klass = *buffer++ << 8; + klass |= *buffer++; + if (type != 0x0c || klass != 1) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + + // Get TTL. + if (length < 4) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + length -= 4; + uint32_t ttl = *buffer++ << 24; + ttl |= *buffer++ << 16; + ttl |= *buffer++ << 8; + ttl |= *buffer++; + + // Skip data length. + if (length < 2) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + buffer += 2; + length -= 2; + + // Get name. + nameLength = decodeCName(ref, static_cast(buffer - ref), maxOffset, tmp, sizeof(tmp) - 1); + if (nameLength == 0) { + Atomic::inc64(&SparrowStatus::get().dnsErrorsDecoding_); + return false; + } + const char* name = reinterpret_cast(tmp); + entry.resolve(now, name, static_cast(strlen(name)), ttl); + return true; +} + +// STATIC +uint32_t DnsNet::decodeCName(const uint8_t* ref, uint32_t sourceOffset, const uint32_t maxSourceOffset, uint8_t* dest, const uint32_t maxDestOffset) { + uint32_t initial = sourceOffset; + uint32_t saved = 0; + uint32_t destOffset = 0; + uint32_t loop_counter = 0; // debug - counter to avoid infinite loop + uint32_t bckwd_ptr = 0; + for (;;) { + if (sourceOffset > maxSourceOffset) { + return 0; + } + if ( ++loop_counter > maxSourceOffset ) { + spw_print_information("[sparrow] DNS packet may be wrong: looped %d times, packet size %d, bckwd_ptr %d, sourceOffset %d, saved %d, initial %d", + loop_counter, maxSourceOffset, bckwd_ptr, sourceOffset, saved, initial); + return 0; + } + const uint8_t b = ref[sourceOffset++]; + if (b == 0) { + if (destOffset > maxDestOffset) { + return 0; + } + dest[destOffset++] = 0; + return (saved == 0 ? sourceOffset : saved) - initial; + } + const int check = b & 0xc0; + if (check == 0) { + const int length = (b & ~0xc0); + if (destOffset != 0) { + if (destOffset > maxDestOffset) { + return 0; + } + dest[destOffset++] = '.'; + } + for (int i = 0; i < length; ++i) { + if (sourceOffset > maxSourceOffset || destOffset > maxDestOffset) { + return 0; + } + dest[destOffset++] = ref[sourceOffset++]; + } + } else if (check == 0xc0) { // Pointer. + if (sourceOffset + 1 > maxSourceOffset) { + return 0; + } + const uint32_t newOffset = ref[sourceOffset++] + ((b & ~0xc0) << 8); + if ( newOffset < sourceOffset ) { + bckwd_ptr++; + } + if ( saved != 0 ) { + spw_print_information("[sparrow] DNS packet may be wrong: at %d pointer to offset %d, but we were already dereferencing a pointer located at %d", + sourceOffset, newOffset, saved); + } + saved = sourceOffset; + sourceOffset = newOffset; + } else { + return 0; + } + } +} + +// The buffer must have room for 80 bytes (max encoded length of an IPv6 address). +// STATIC +uint32_t DnsNet::encodeCName(uint8_t* buffer, const uint8_t* address, const bool v6) { + const uint8_t* ref = buffer; + if (v6) { + for (int i = 15; i >= 0; i--) { + uint8_t b = address[i]; + *buffer++ = static_cast(1); + *buffer++ = nibbles_[b & 0xf]; + *buffer++ = static_cast(1); + *buffer++ = nibbles_[b >> 4]; + } + memcpy(buffer, ipV6Suffix_, 10); + buffer += 10; + } else { + int a = address[3]; + uint8_t x = format_[a]; + uint8_t y = format_[a + 256]; + uint8_t z = format_[a + 512]; + if (x != static_cast('0')) { + *buffer++ = static_cast(3); + *buffer++ = x; + *buffer++ = y; + *buffer++ = z; + } else if (y !=static_cast('0')) { + *buffer++ = static_cast(2); + *buffer++ = y; + *buffer++ = z; + } else { + *buffer++ = static_cast(1); + *buffer++ = z; + } + a = address[2]; + x = format_[a]; + y = format_[a + 256]; + z = format_[a + 512]; + if (x != static_cast('0')) { + *buffer++ = static_cast(3); + *buffer++ = x; + *buffer++ = y; + *buffer++ = z; + } else if (y != static_cast('0')) { + *buffer++ = static_cast(2); + *buffer++ = y; + *buffer++ = z; + } else { + *buffer++ = static_cast(1); + *buffer++ = z; + } + a = address[1]; + x = format_[a]; + y = format_[a + 256]; + z = format_[a + 512]; + if (x != static_cast('0')) { + *buffer++ = static_cast(3); + *buffer++ = x; + *buffer++ = y; + *buffer++ = z; + } else if (y != static_cast('0')) { + *buffer++ = static_cast(2); + *buffer++ = y; + *buffer++ = z; + } else { + *buffer++ = static_cast(1); + *buffer++ = z; + } + a = address[0]; + x = format_[a]; + y = format_[a + 256]; + z = format_[a + 512]; + if (x != static_cast('0')) { + *buffer++ = static_cast(3); + *buffer++ = x; + *buffer++ = y; + *buffer++ = z; + } else if (y != static_cast('0')) { + *buffer++ = static_cast(2); + *buffer++ = y; + *buffer++ = z; + } else { + *buffer++ = static_cast(1); + *buffer++ = z; + } + memcpy(buffer, ipV4Suffix_, 14); + buffer += 14; + } + return static_cast(buffer - ref); +} + + +} diff --git a/storage/sparrow/dns/dnsnet.h b/storage/sparrow/dns/dnsnet.h new file mode 100644 index 000000000000..231781bcd183 --- /dev/null +++ b/storage/sparrow/dns/dnsnet.h @@ -0,0 +1,41 @@ +/* + DNS network utilities. +*/ + +#ifndef _dns_net_h_ +#define _dns_net_h_ + +#include "../handler/plugin.h" // For configuration parameters. +#include "../engine/types.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsNet +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsCacheEntry; +class DnsNet { +private: + + static const char* nibbles_; + static const char* format_; + static const char ipV4Suffix_[]; + static const char ipV6Suffix_[]; + +private: + + static uint32_t decodeCName(const uint8_t* ref, uint32_t sourceOffset, const uint32_t maxSourceOffset, uint8_t* dest, const uint32_t maxDestOffset); + static uint32_t encodeCName(uint8_t* buffer, const uint8_t* address, const bool v6); + +public: + + static uint32_t forgeQuery(uint8_t* buffer, uint32_t length, uint16_t id, bool recurse, + const uint8_t* address, bool v6); + + static bool decodeResponse(uint64_t now, const uint8_t* buffer, uint32_t length, DnsCacheEntry& entry); +}; + +} + +#endif /* #ifndef _dns_net_h_ */ diff --git a/storage/sparrow/dns/dnsserver.cc b/storage/sparrow/dns/dnsserver.cc new file mode 100644 index 000000000000..e8f8db108ece --- /dev/null +++ b/storage/sparrow/dns/dnsserver.cc @@ -0,0 +1,193 @@ +/* + Connection to a DNS server. +*/ + +#include "dnsserver.h" +#include "dns.h" + +#define MYSQL_SERVER 1 +//#include +#include "sql/query_options.h" // For mysqld options. +#include +//#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsSocket +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SYSpHash DnsSocket::hash_(256); + +DnsSocket::DnsSocket(const char* address, uint32_t port) _THROW_(SparrowException) + : socketId_(INVALID_SOCKET), address_(SocketUtil::getAddress(address, port)), references_(0) { + SPARROW_ENTER("DnsSocket::DnsSocket"); +} + +void DnsSocket::open() _THROW_(SparrowException) { + SPARROW_ENTER("DnsSocket::open"); + if (socketId_ == INVALID_SOCKET) { + socketId_ = SocketUtil::create(SOCK_DGRAM, address_); + setsockopt(socketId_, SOL_SOCKET, SO_RCVBUF, reinterpret_cast(&sparrow_socket_rcvbuf_size), sizeof(sparrow_socket_rcvbuf_size)); +#ifndef NDEBUG + const Str address = address_.print(); + DBUG_PRINT("sparrow_dns", ("Opening datagram socket %s: id is %u", address.c_str(), static_cast(socketId_))); +#endif + } +} + +DnsSocket::~DnsSocket() { + SPARROW_ENTER("DnsSocket::~DnsSocket"); + if (socketId_ != INVALID_SOCKET) { +#ifndef NDEBUG + const Str address = address_.print(); + DBUG_PRINT("sparrow_dns", ("Closing datagram socket %s: id is %u", address.c_str(), static_cast(socketId_))); +#endif + ::shutdown(socketId_, SHUT_RDWR); + closesocket(socketId_); + } +} + +// Gets a socket for a given (source address, source port) couple out of the dictionary. +// STATIC +DnsSocket* DnsSocket::acquire(const char* address, uint32_t port) _THROW_(SparrowException) { + SPARROW_ENTER("DnsSocket::acquire"); + DnsSocket key(address, port); + DnsSocket* socket = hash_.find(&key); + if (socket == 0) { + try { + socket = new DnsSocket(address, port); + } catch(const SparrowException& e) { + e.toLog(); + } + hash_.insert(socket); + } + socket->references_++; +#ifndef NDEBUG + const Str saddress = socket->address_.print(); + DBUG_PRINT("sparrow_dns", ("Datagram socket %s has now %u references", saddress.c_str(), socket->references_)); +#endif + return socket; +} + +// Releases a socket when it is no longer used. +// STATIC +void DnsSocket::release(DnsSocket* socket) { + SPARROW_ENTER("DnsSocket::release"); + --socket->references_; +#ifndef NDEBUG + const Str address = socket->address_.print(); + DBUG_PRINT("sparrow_dns", ("Datagram socket %s has now %u references", address.c_str(), socket->references_)); +#endif + if (socket->references_ == 0) { + delete hash_.remove(socket); + } +} + +// Helper to fill a set of socket ids. Returns the max socket id, to pass to the select() call. +// The stop socket is present in the fd set, but not in the socket vector. +// This way, stop data is properly ignored. +// STATIC +my_socket DnsSocket::fillFdSet(fd_set* fdSet, SYSvector& socketIds) { + SPARROW_ENTER("DnsSocket::fillFdSet"); + my_socket maxSocketId = INVALID_SOCKET; + FD_ZERO(fdSet); + socketIds.resize(hash_.entries()); + socketIds.forceLength(0); + SYSpHashIterator iterator(hash_); + bool first = true; + while (++iterator) { + DnsSocket& socket = *iterator.key(); + my_socket socketId = socket.get(); + FD_SET(socketId, fdSet); + socketIds.append(socketId); + if (first || socketId + 1 > maxSocketId) { + first = false; + maxSocketId = socketId + 1; + } + } + my_socket stopSocket = SocketUtil::getStopSocket(); + if (stopSocket != INVALID_SOCKET) { + FD_SET(stopSocket, fdSet); + if (first || stopSocket + 1 > maxSocketId) { + maxSocketId = stopSocket + 1; + } + } + return maxSocketId; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsServerConnection +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Dictionary of DNS connections. +SYSpHash DnsServerConnection::hash_(256); + +// If this new DNS connection is not a search key, the socket is actually opened. +DnsServerConnection::DnsServerConnection(const DnsServer& dns, const bool key) _THROW_(SparrowException) + : address_(SocketUtil::getAddress(dns.getHost(), dns.getPort())), references_(0) { + SPARROW_ENTER("DnsServerConnection::DnsServerConnection"); + + // Acquire socket only after we are sure address_ is valid. + socket_ = DnsSocket::acquire(dns.getSourceAddress(), dns.getSourcePort()); + if (!key) { + socket_->open(); + } +} + +DnsServerConnection::~DnsServerConnection() { + SPARROW_ENTER("DnsServerConnection::~DnsServerConnection"); + DnsSocket::release(socket_); +} + +// Gets a connection to a DNS server out of the dictionary. +// STATIC +DnsServerConnection* DnsServerConnection::acquire(const DnsServer& dns) _THROW_(SparrowException) { + SPARROW_ENTER("DnsServerConnection::acquire"); + Guard guard(Dns::getLock()); + const DnsServerConnection key(dns, true); + DnsServerConnection* server = hash_.find(&key); + if (server == 0) { + server = new DnsServerConnection(dns, false); + hash_.insert(server); + Dns::incSerial(); + } + server->references_++; + return server; +} + +// Releases a connection to a DNS server. +// STATIC +void DnsServerConnection::release(DnsServerConnection* server) { + SPARROW_ENTER("DnsServerConnection::release"); + Guard guard(Dns::getLock()); + if (--server->references_ == 0) { + delete hash_.remove(server); + Dns::incSerial(); + } +} + +// Sends a packet to a DNS server. +void DnsServerConnection::send(const uint8_t* buffer, const uint32_t length) _THROW_(SparrowException) { + SPARROW_ENTER("DnsServerConnection::send"); + const my_socket socketId = socket_->get(); + uint32_t sent = 0; + while (sent < length) { + int result = sendto(socketId, reinterpret_cast(buffer) + sent, length - sent, 0, + address_.getSockAddr(), address_.getSockAddrLength()); + if (result == -1) { + throw SparrowException::create(true, "Cannot send UDP packet to %s", address_.print().c_str()); + } + sent += result; + } +} + +} diff --git a/storage/sparrow/dns/dnsserver.h b/storage/sparrow/dns/dnsserver.h new file mode 100644 index 000000000000..1497b735f088 --- /dev/null +++ b/storage/sparrow/dns/dnsserver.h @@ -0,0 +1,229 @@ +/* + Connection to a DNS server. +*/ + +#ifndef _dns_dnsserver_h_ +#define _dns_dnsserver_h_ + +#include "../engine/types.h" +#include "../engine/hash.h" +#include "../engine/socketutil.h" +#include "../engine/thread.h" +#include "../engine/scheduler.h" +#include "dnsdefault.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsSocket +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// UDP socket bound on a specific source address and source port. +class DnsSocket { +private: + + static SYSpHash hash_; + + my_socket socketId_; + SocketAddress address_; + uint32_t references_; + +public: + + DnsSocket(const char* address, uint32_t port) _THROW_(SparrowException); + + ~DnsSocket(); + + void open() _THROW_(SparrowException); + + static DnsSocket* acquire(const char* address, uint32_t port) _THROW_(SparrowException); + + static void release(DnsSocket* socket); + + bool operator == (const DnsSocket& right) const { + return address_ == right.address_; + } + + uint32_t hash() const { + return address_.hash(); + } + + my_socket get() { + return socketId_; + } + + static my_socket fillFdSet(fd_set* fdSet, SYSvector& socketIds); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsServerConnection +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsServer; +class DnsServerConnection { +private: + + static SYSpHash hash_; + + DnsSocket* socket_; + SocketAddress address_; + uint32_t references_; + +public: + + DnsServerConnection(const DnsServer& dns, const bool key) _THROW_(SparrowException); + + ~DnsServerConnection(); + + static DnsServerConnection* acquire(const DnsServer& dns) _THROW_(SparrowException); + + static void release(DnsServerConnection* server); + + bool operator == (const DnsServerConnection& right) const { + return *socket_ == *right.socket_ && address_ == right.address_; + } + + uint32_t hash() const { + return socket_->hash() + address_.hash(); + } + + void send(const uint8_t* buffer, const uint32_t length) _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsServer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsServer { + friend ByteBuffer& operator >> (ByteBuffer& buffer, DnsServer& dns); + friend ByteBuffer& operator << (ByteBuffer& buffer, const DnsServer& dns); + +private: + + Str host_; + uint32_t port_; + Str sourceAddress_; + uint32_t sourcePort_; + DnsServerConnection* connection_; + +public: + + DnsServer() : port_(0), sourcePort_(0), connection_(0) { + } + + DnsServer(const char* host, const uint32_t port, const char* sourceAddress, const uint32_t sourcePort) + : host_(host), port_(port), sourceAddress_(sourceAddress), sourcePort_(sourcePort), connection_(0) { + } + + DnsServer& operator = (const DnsServer& right) { + SPARROW_ENTER("DnsServer::operator ="); + const bool started = (connection_ != 0); + if (started) { + DnsServerConnection::release(connection_); + connection_ = 0; + } + host_ = right.host_; + port_ = right.port_; + sourceAddress_ = right.sourceAddress_; + sourcePort_ = right.sourcePort_; + if (started) { + try { + start(); + } catch(const SparrowException&) { + // Ignore error. + connection_ = 0; + } + } + return *this; + } + + bool operator == (const DnsServer& right) const { + return host_ == right.host_ && port_ == right.port_ + && sourceAddress_ == right.sourceAddress_ && sourcePort_ == right.sourcePort_; + } + + bool operator < (const DnsServer& right) const { + int cmp = host_.compareTo(right.host_, false); + if (cmp < 0) { + return true; + } else if (cmp > 0) { + return false; + } + if (port_ < right.port_) { + return true; + } else if (port_ > right.port_) { + return false; + } + cmp = sourceAddress_.compareTo(right.sourceAddress_, false); + if (cmp < 0) { + return true; + } else if (cmp > 0) { + return false; + } + if (sourcePort_ < right.sourcePort_) { + return true; + } else if (sourcePort_ > right.sourcePort_) { + return false; + } + return false; + } + + DnsServer(const DnsServer& right) : connection_(0) { + *this = right; + } + + ~DnsServer() { + if (connection_ != 0) { + DnsServerConnection::release(connection_); + } + } + + const char* getHost() const { + return host_.c_str(); + } + + uint32_t getPort() const { + return port_; + } + + const char* getSourceAddress() const { + return sourceAddress_.c_str(); + } + + uint32_t getSourcePort() const { + return sourcePort_; + } + + // Initiate the connection with this DNS server. + void start() _THROW_(SparrowException) { + if (connection_ == 0) { + connection_ = DnsServerConnection::acquire(*this); + } + } + + DnsServerConnection& getConnection() { + return *connection_; + } + + Str print() const { + char buffer[1024]; + snprintf(buffer, sizeof(buffer), "host=%s, port=%u, sourceAddress=%s, sourcePort=%u", getHost(), getPort(), getSourceAddress(), getSourcePort()); + return Str(buffer); + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const DnsServer& dns) { + buffer << dns.host_ << dns.port_ << dns.sourceAddress_ << dns.sourcePort_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, DnsServer& dns) { + buffer >> dns.host_ >> dns.port_ >> dns.sourceAddress_ >> dns.sourcePort_; + return buffer; +} + +typedef SYSsortedVector DnsServers; + +} + +#endif /* #ifndef _dns_dnsserver_h_ */ diff --git a/storage/sparrow/engine/alter.cc b/storage/sparrow/engine/alter.cc new file mode 100644 index 000000000000..11e12e811d7f --- /dev/null +++ b/storage/sparrow/engine/alter.cc @@ -0,0 +1,210 @@ +/* + Online table modifications. +*/ + +#include "alter.h" +#include "internalapi.h" +#include "fileutil.h" +#include "purge.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const JobThreadFactory AlterWorker::factory_("AlterWorker"); +JobThreadPool* AlterWorker::threadPool_ = 0; +Lock AlterWorker::lock_(true, "AlterWorker::lock_"); + +// STATIC +void AlterWorker::initialize() _THROW_(SparrowException) { + Guard guard(lock_); + if (threadPool_ == 0) { + // The queue is not bulk because we want jobs to be distributed across workers. + threadPool_ = new JobThreadPool(AlterWorker::factory_, &sparrow_max_alter_threads, + &SparrowStatus::get().alterThreads_, "AlterWorker::Queue", false); + } +} + +// STATIC +void AlterWorker::shutdown() { + Guard guard(lock_); + if (threadPool_ != 0) { + threadPool_->stop(); + } +} + +// STATIC +void AlterWorker::sendJob(Job* job) { + threadPool_->send(job); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MainAlterTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void MainAlterTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + SPARROW_ENTER("MainAlterTask::run"); + partition_->alter(this); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void AlterTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + SPARROW_ENTER("AlterTask::run"); + try { + const AlterationType type = alteration_.getType(); + if (type == ALT_ADD_INDEX) { + *stats_ += partition_->createIndex(alteration_.getId(), this); + } else if (type == ALT_DROP_INDEX) { + *stats_ += partition_->dropIndex(alteration_.getId()); + } + } catch(const SparrowException& e) { + e.toLog(); + } + if (Atomic::dec32(counter_) == 0 && !isStopping()) { + PersistentPartitionGuard next = partition_->alterationDone(*stats_, newIndexAlterSerial_); + delete counter_; + delete stats_; + if (next.get() != 0) { + Scheduler::addTask(new MainAlterTask(next.get())); + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Alteration +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Str Alteration::getDescription(const Master& master) const { + PrintBuffer buffer; + switch (type_) { + case ALT_ADD_INDEX: // Fall through. + case ALT_DROP_INDEX: { + const Indexes& indexes = master.getIndexes(); + const Index& index = indexes[id_]; + const ColumnIds& columnIds = index.getColumnIds(); + buffer << (type_ == ALT_ADD_INDEX ? "Adding " : "Dropping ") << (index.isUnique() ? "unique " : "") + << "index " << index.getName() << "("; + const Columns& columns = master.getColumns(); + for (uint32_t i = 0; i < columnIds.length(); ++i) { + if (i > 0) { + buffer << ", "; + } + buffer << columns[columnIds[i]].getName(); + } + buffer << ")"; + break; + } + default: break; + } + return Str(reinterpret_cast(buffer.getData()), static_cast(buffer.position())); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +int AlterComparator::compare(const uint32_t row1, const uint32_t row2, const bool sortByRow) const { + if (task_ != 0 && (comparisons_++ % 16384) == 0 && task_->isStopping()) { + return 0; + } + const uint64_t offset1 = reader1_.seekRecordData(row1); + const uint64_t offset2 = reader2_.seekRecordData(row2); + uint8_t bitArray1[SPARROW_MAX_BIT_SIZE]; + recordWrapper_.readBits(reader1_, bitArray1); + uint8_t bitArray2[SPARROW_MAX_BIT_SIZE]; + recordWrapper_.readBits(reader2_, bitArray2); + uint8_t buffer1[MAX_KEY_LENGTH]; + uint8_t buffer2[MAX_KEY_LENGTH]; + const uint64_t start1 = offset1 + recordWrapper_.getBitSize(); + const uint64_t start2 = offset2 + recordWrapper_.getBitSize(); + const uint32_t nbInfos = infos_.length(); + for (uint32_t i = 0; i < nbInfos; ++i) { + const ColumnInfo& info = infos_[i]; + const FieldBase& field = *fields_[info.getId()]; + uint8_t bits1 = 0; + uint8_t bits2 = 0; + uint32_t bitOffset = info.getBitOffset(); + const uint32_t nbits = info.getNBits(); + for (uint32_t b = 0; b < nbits; ++b, ++bitOffset) { + bits1 |= ((bitArray1[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + bits2 |= ((bitArray2[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + } + const uint32_t offset = info.getOffset(); + reader1_.seek(start1 + offset); + uint8_t* p1 = buffer1; + field.readPersistent(reader1_, stringReader1_, bits1, p1, true); + const bool null1 = field.isNullable() && *p1++ != 0; + reader2_.seek(start2 + offset); + uint8_t* p2 = buffer2; + field.readPersistent(reader2_, stringReader2_, bits2, p2, true); + const bool null2 = field.isNullable() && *p2++ != 0; + if (null1) { // NULLs are the smallest values. + if (!null2) { + return -1; + } + } else if (null2) { + return 1; + } else { + const int cmp = field.compare(p1, p2); + if (cmp != 0) { + return cmp; + } + } + } + if (sortByRow) { + // Sort by row: in case of identical values, we get a better locality. + return row1 > row2 ? 1 : (row1 < row2 ? -1 : 0); + } else { + return 0; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void AlterWriter::writeRecord(ByteBuffer& buffer, const uint32_t row) _THROW_(SparrowException) { + const uint64_t offset = reader_.seekRecord(row); + uint8_t bitArray[SPARROW_MAX_BIT_SIZE]; + recordWrapper_.readBits(reader_, bitArray); + + // Write bits to output buffer. + uint8_t* bitValues = bitArray_.data(); + memset(bitValues, 0, bitArray_.length()); + uint32_t n = 0; + for (uint32_t i = 0; i < infos_.length(); ++i) { + const ColumnInfo& info = infos_[i]; + uint32_t bitOffset = info.getBitOffset(); + const uint32_t nbits = info.getNBits(); + for (uint32_t b = 0; b < nbits; ++b, ++bitOffset, ++n) { + if (bitArray[bitOffset / 8] & (1 << (bitOffset % 8))) { + bitValues[n / 8] |= (1 << (n % 8)); + } + } + } + buffer << ByteBuffer(bitValues, bitArray_.length()); + + // Write field values. + const uint64_t start = offset + recordWrapper_.getBitSize(); + BinBuffer* binBuffer = partition_.getVersion() >= PersistentPartition::appendVersion_ ? 0 : &binBuffer_; + for (uint32_t i = 0; i < infos_.length(); ++i) { + const ColumnInfo& info = infos_[i]; + uint8_t bits = 0; + uint32_t bitOffset = info.getBitOffset(); + const uint32_t nbits = info.getNBits(); + for (uint32_t b = 0; b < nbits; ++b, ++bitOffset) { + bits |= ((bitArray[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + } + const uint32_t offset = info.getOffset(); + reader_.seek(start + offset); + fields_[info.getId()]->copy(reader_, stringReader_, bits, buffer, binBuffer); + } +} + +} + diff --git a/storage/sparrow/engine/alter.h b/storage/sparrow/engine/alter.h new file mode 100644 index 000000000000..629b0577dc12 --- /dev/null +++ b/storage/sparrow/engine/alter.h @@ -0,0 +1,175 @@ +/* + Online table modifications. +*/ + +#ifndef _engine_alter_h_ +#define _engine_alter_h_ + +#include "transient.h" +#include "persistent.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class AlterWorker { +private: + + static const JobThreadFactory factory_; + static JobThreadPool* threadPool_; + static Lock lock_; + +public: + + static void initialize() _THROW_(SparrowException); + static void shutdown(); + static void sendJob(Job* job); + static Queue& getQueue() { + return *threadPool_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MainAlterTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MainAlterTask : public MasterTask { +private: + + PersistentPartitionGuard partition_; + +public: + + MainAlterTask(PersistentPartition* partition) + : MasterTask(AlterWorker::getQueue(), &partition->getMaster()), partition_(partition) { + } + + virtual bool operator == (const MainAlterTask& right) const { + return this == &right; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class AlterTask : public MasterTask { +private: + + uint32_t* counter_; + AlterationStats* stats_; + const uint32_t newIndexAlterSerial_; + PersistentPartitionGuard partition_; + const Alteration alteration_; + +public: + + AlterTask(uint32_t* counter, AlterationStats* stats, const uint32_t newIndexAlterSerial, PersistentPartition* partition, const Alteration& alteration) + : MasterTask(AlterWorker::getQueue(), &partition->getMaster()), counter_(counter), stats_(stats), newIndexAlterSerial_(newIndexAlterSerial), + partition_(partition), alteration_(alteration) { + } + + virtual bool operator == (const AlterTask& right) const { + return this == &right; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class AlterComparator : private DataFileReader { +private: + + const Task* task_; // Interruptible alter task. + const uint64_t recordSize_; // Size of data records. + PartitionReader& reader1_; // First reader on data file. + PartitionReader& stringReader1_; // First reader on string file. + PartitionReader& reader2_; // Second reader on data file. + PartitionReader& stringReader2_; // Second reader on string file. + mutable uint32_t comparisons_; // To check if task is interrupted. + +public: + + AlterComparator(const Task* task, const TableFields& fields, const ColumnIds& columnIds, const ColumnIds& skippedColumns, + PartitionReader& reader1, PartitionReader& stringReader1, PartitionReader& reader2, PartitionReader& stringReader2) + : DataFileReader(fields, columnIds, skippedColumns), task_(task), recordSize_(reader1.getHeader().getRecordSize()), + reader1_(reader1), stringReader1_(stringReader1), reader2_(reader2), stringReader2_(stringReader2), comparisons_(0) { + } + + int compare(const uint32_t row1, const uint32_t row2, const bool sortByRow) const; + + int compare(const uint32_t row1, const uint32_t row2) const { + return compare(row1, row2, true); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AlterWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class AlterWriter : private DataFileReader { +private: + + const PersistentPartition& partition_; + const uint64_t recordSize_; // Size of data (input) records. + PartitionReader& reader_; // Reader on data file. + PartitionReader& stringReader_; // Reader on strings. + uint32_t size_; // Size of written records. + uint32_t bits_; // Number of bits in written records. + BinBuffer binBuffer_; + BitArray bitArray_; + +public: + + AlterWriter(const PersistentPartition& partition, const TableFields& fields, const ColumnIds& columnIds, const ColumnIds& skippedColumns, + PartitionReader& reader, PartitionReader& stringReader) + : DataFileReader(fields, columnIds, skippedColumns), partition_(partition), recordSize_(reader.getHeader().getRecordSize()), + reader_(reader), stringReader_(stringReader) { + bits_ = 0; + size_ = 0; + for (uint32_t i = 0; i < infos_.length(); ++i) { + bits_ += infos_[i].getNBits(); + size_ += infos_[i].getSize(); + } + const uint32_t bitSize = (bits_ + 7) / 8; + bitArray_ = BitArray(bitSize); + size_ += bitSize; + } + + uint32_t getSize() const { + return size_; + } + + const BinBuffer& getBinBuffer() const { + return binBuffer_; + } + + void writeRecord(ByteBuffer& buffer, const uint32_t row) _THROW_(SparrowException); +}; + +} + +#endif /* #ifndef _engine_alter_h_ */ diff --git a/storage/sparrow/engine/atomic.h b/storage/sparrow/engine/atomic.h new file mode 100644 index 000000000000..2a915b949d9b --- /dev/null +++ b/storage/sparrow/engine/atomic.h @@ -0,0 +1,190 @@ +/* + Simple atomic operations. +*/ + +#ifndef _engine_atomic_h_ +#define _engine_atomic_h_ + +#ifdef _WIN32 +#include +#include +#if (MSVC_VER >= 1500) +// This one can be intrinsinc only with Visual Studio 2008. +#pragma intrinsic(_InterlockedAdd) +#endif +#pragma intrinsic(_InterlockedExchangeAdd) +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) +#pragma intrinsic(_InterlockedCompareExchange) +#elif defined(__SunOS) +#include +#endif +#include + +namespace Sparrow { + +class Atomic { +private: + + Atomic(); + +public: + +#ifdef _WIN64 + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return _InterlockedExchangeAdd(reinterpret_cast(target), delta); + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + return _InterlockedExchangeAdd64(reinterpret_cast(target), delta); + } + static uint32_t inc32(volatile uint32_t* target) { + return _InterlockedIncrement(reinterpret_cast(target)); + } + static uint32_t dec32(volatile uint32_t* target) { + return _InterlockedDecrement(reinterpret_cast(target)); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return _InterlockedCompareExchange(reinterpret_cast(target), newval, cmp) == cmp; + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return _InterlockedCompareExchange64(reinterpret_cast(target), newval, cmp) == cmp; + } +#elif defined(_WIN32) + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return _InterlockedExchangeAdd(reinterpret_cast(target), delta) + delta; + } + // 64-bit interlocked functions for 32-bit platforms are available only in Vista, + // so use assembly code. + static uint64_t interlockedCompareExchange64(volatile uint64_t* target, const uint64_t value, const uint64_t comp){ + __asm { + mov esi, [target] + mov ebx, dword ptr [value] + mov ecx, dword ptr [value + 4] + mov eax, dword ptr [comp] + mov edx, dword ptr [comp + 4] + lock cmpxchg8b [esi] + } + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return interlockedCompareExchange64(target, newval, cmp) == cmp; + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + uint64_t old; + do { + old = *target; + } while (!cas64(target, old, old + delta)); + return old + delta; + } + static uint32_t inc32(volatile uint32_t* target) { + return _InterlockedIncrement(reinterpret_cast(target)); + } + static uint32_t dec32(volatile uint32_t* target) { + return _InterlockedDecrement(reinterpret_cast(target)); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return _InterlockedCompareExchange(reinterpret_cast(target), newval, cmp) == cmp; + } +#elif defined(__SunOS) // Solaris. + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return atomic_add_32_nv(target, delta); + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + return atomic_add_64_nv((volatile uint64_t*)target, delta); + } + static uint32_t inc32(volatile uint32_t* target) { + return atomic_inc_32_nv(target); + } + static uint32_t dec32(volatile uint32_t* target) { + return atomic_dec_32_nv(target); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return atomic_cas_32(target, cmp, newval) == cmp; + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return atomic_cas_64(reinterpret_cast(target), cmp, newval) == cmp; + } +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) // gcc or Intel Compiler. + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + return __sync_add_and_fetch(target, delta); + } + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + return __sync_add_and_fetch(target, delta); + } + static uint32_t inc32(volatile uint32_t* target) { + return __sync_add_and_fetch(target, 1); + } + static uint32_t dec32(volatile uint32_t* target) { + return __sync_sub_and_fetch(target, 1); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + return __sync_bool_compare_and_swap(target, cmp, newval); + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + return __sync_bool_compare_and_swap(target, cmp, newval); + } +#elif defined(__x86_64__) // x64 support. + static uint32_t add32(volatile uint32_t* target, const int32_t delta) { + uint32_t result=0; + asm volatile ("lock; xaddl %0, %1" + : "=r"(result), "=m"(*target) + : "0"(delta), "m"(*target) + : "memory", "cc"); + return result + delta; + } + + static uint64_t add64(volatile uint64_t* target, const int64_t delta) { + uint64_t temp = static_cast(delta); + asm volatile("lock; xaddq %0,%1" + : "+r" (temp), "+m" (*target) + : : "memory"); + return temp + delta; + } + static uint32_t inc32(volatile uint32_t* target) { + return add32(target, 1); + } + static uint32_t dec32(volatile uint32_t* target) { + return add32(target, -1); + } + static bool cas32(volatile uint32_t* target, const uint32_t cmp, const uint32_t newval) { + uint32_t result; + asm volatile ("lock; cmpxchgl %1, %2" + : "=a" (result) + : "r" (newval), "m" (*target), "0" (cmp) + : "memory"); + return result == newval; + } + static bool cas64(volatile uint64_t* target, const uint64_t cmp, const uint64_t newval) { + uint64_t result; + asm volatile("lock; cmpxchgq %1,%2" + : "=a" (result) + : "q" (newval), "m" (*target), "0" (cmp) + : "memory"); + return result == newval; + } +#else +#error Missing atomic functions +#endif + static void set64(volatile uint64_t* target, const uint64_t newVal) { + uint64_t old; + do { + old = *target; + } while (!cas64(target, old, newVal)); + } + static uint64_t get64(volatile uint64_t* target) { + uint64_t result; + do { + result = *target; + } while (!cas64(target, result, result)); + return result; + } + static uint64_t inc64(volatile uint64_t* target) { + return add64(target, 1); + } + static uint64_t dec64(volatile uint64_t* target) { + return add64(target, -1); + } +}; + +} + +#endif /* #ifndef _engine_atomic_h_ */ diff --git a/storage/sparrow/engine/binbuffer.cc b/storage/sparrow/engine/binbuffer.cc new file mode 100644 index 000000000000..1cb9e0597545 --- /dev/null +++ b/storage/sparrow/engine/binbuffer.cc @@ -0,0 +1,96 @@ +/* + Buffer for binary strings. + May be stored on disk when becoming too large to be kept in memory. +*/ + +#include "binbuffer.h" +#include "fileutil.h" + +#include "../engine/log.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BinString +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void BinString::read(FileReader& reader, ByteBuffer& buffer) { + length_ = 0; + for (uint32_t offset = 0; ; offset += 7) { + uint8_t v; + reader >> v; + length_ |= (v & 0x7f) << offset; + if ((v & 0x80) == 0) { + break; + } + } + offset_ = reader.getFileOffset(); + // Ensure there is at least length_ bytes available in buffer + buffer.advance(length_); + data_ = buffer.getCurrentData() - length_; + ByteBuffer tmp(data_, static_cast(length_)); + reader >> tmp; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BinBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void BinBuffer::optimize(FileReader& reader, const uint64_t size) { + GrowingByteBuffer buffer; + const uint64_t limit = reader.getFileOffset() + size; + while (reader.getFileOffset() < limit) { + buffer.position(0); + BinString s; + s.read(reader, buffer); + BinString* found = hash_.find(s); + if (found != 0) { + found->setOffset(s.getOffset()); + } + } +} + +uint64_t BinBuffer::flush(FileWriter& writer) { + SYShashIterator > iterator(hash_); + const uint64_t save = writer.getFileOffset(); + while (++iterator) { + BinString& s = iterator.key(); + if (!s.isSmall() && s.getOffset() == TRANSIENT) { + const uint64_t offset = writer.getFileOffset(); + const uint32_t length = s.write(writer); + s.setOffset(offset + length); + } + } + return writer.getFileOffset() - save; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// GrowingByteBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const uint32_t GrowingByteBuffer::initialSize_ = 1024; + +// Increase buffer size (X2). +void GrowingByteBuffer::overflow() _THROW_(SparrowException) { + const uint8_t* oldData = data_; + const bool initialized = limit_ != 0; + const uint64_t newLimit = initialized ? limit_ * 2 : GrowingByteBuffer::initialSize_; + uint8_t* newData = new uint8_t[newLimit]; + if (newData == 0) { + spw_print_error("GrowingByteBuffer::overflow: cannot allocate %llu bytes of memory", static_cast(newLimit)); + } + if (initialized) { + memcpy(newData, oldData, limit_); + } + limit_ = newLimit; + data_ = newData; + if (initialized) { + // Virtual method to notify derived classes of the buffer relocation. For example, if a derived class + // held pointers into this buffer, it needs to relocate them. + extended(oldData, newData); + delete [] oldData; + } +} + +} + diff --git a/storage/sparrow/engine/binbuffer.h b/storage/sparrow/engine/binbuffer.h new file mode 100644 index 000000000000..ba3d5f56eb1f --- /dev/null +++ b/storage/sparrow/engine/binbuffer.h @@ -0,0 +1,238 @@ +/* + Buffer for binary strings. + May be stored on disk when becoming too large to be kept in memory. +*/ + +#ifndef _engine_binbuffer_h_ +#define _engine_binbuffer_h_ + +#include "fileutil.h" +#include "types.h" +#include "serial.h" +#include "list.h" +#include "hash.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// GrowingByteBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Byte buffer that can grow in memory. +class GrowingByteBuffer : public ByteBuffer, public ByteBufferOverflow { +private: + + static const uint32_t initialSize_; + +public: + + GrowingByteBuffer() : ByteBuffer(0, 0, this) { + } + + // Clears this buffer. + void clear() { + position(0); + limit(0); + delete [] data_; + data_ = 0; + } + + ~GrowingByteBuffer() { + clear(); + } + + void overflow() override _THROW_(SparrowException); + + bool end() const override { + return position() >= limit(); + } + + virtual void extended(const uint8_t* oldData, const uint8_t* newData) { + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BinString +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define TRANSIENT ULLONG_MAX + +/* References a string. The object does not own the string: it is not allocated or freed here. + If it is used to reference a string from a PERSISTENT partition, offset_ gives the offset into the + SPS file of the start of the string and length_ gives the string length. data_ is unused. + If it is used to reference a string from a TRANSIENT partition, data_ points to the start of the string + and length_ gives the string length. offset_ is unused. +*/ + +class BinString { +private: + + const uint8_t* data_; + uint32_t length_; + uint64_t offset_; + +public: + + BinString() : data_(0), length_(0), offset_(TRANSIENT) { + } + + BinString(const uint8_t* data, const uint32_t length) : data_(data), length_(length), offset_(TRANSIENT) { + } + + uint64_t getPosition(const uint8_t* base) const { + return offset_ == TRANSIENT ? static_cast(data_ - base) : offset_; + } + + const uint8_t* getData() const { + return data_; + } + + uint32_t getLength() const { + return length_; + } + + bool isSmall() const { + return length_ <= 16; + } + + uint64_t getOffset() const { + return offset_; + } + + void setOffset(const uint64_t offset) { + offset_ = offset; + } + + // In case the string moves in memory. + void rebase(const uint8_t* oldBase, const uint8_t* newBase) { + const uint64_t position = getPosition(oldBase); + data_ = newBase + position; + } + + // Compare from right to left. + bool operator == (const BinString& right) const { + if (this == &right) { + return true; + } + if (length_ == right.length_) { + uint32_t i = length_; + while (i-- > 0) { + if (data_[i] != right.data_[i]) { + return false; + } + } + return true; + } + return false; + } + + // Java-like hash code. + uint32_t hash() const { + uint32_t h = 1; + for (uint32_t i = 0; i < length_; ++i) { + h = 31 * h + data_[i]; + } + return h; + } + + void read(FileReader& reader, ByteBuffer& buffer); + + uint32_t write(ByteBuffer& buffer) const; +}; + +typedef SYSxvector BinStrings; + +inline uint32_t BinString::write(ByteBuffer& buffer) const { + uint32_t length = getLength(); + uint32_t count = 0; + for (;;) { + uint8_t v = length & 0x7f; + length >>= 7; + const bool end = length == 0; + buffer << static_cast(end ? v : (v | 0x80)); + ++count; + if (end) { + break; + } + } + buffer << ByteBuffer(getData(), static_cast(getLength())); + return count; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BinBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* Buffer containing unique byte strings. The strings are all stored, one after the other, in the + GrowingByteBuffer. The SYShash hash_ allows for a quick lookup of the strings the buffer contains. + Each BinString object references a string: + . If the buffer comes from a TRANSIENT partition, the BinString objects contain a pointer into this buffer + of the start of each string and the string length. + . If the buffer was read from a PERSISTENT partition, the BinString objects contain the offset into the file + of the start of each string and the string length. +*/ +class FileWriter; +class BinBuffer : public GrowingByteBuffer { + friend ByteBuffer& operator << (ByteBuffer& buffer, const BinBuffer& binBuffer); + +private: + + // Hash of byte strings. + SYShash > hash_; + +public: + + BinBuffer() : hash_(65536) { + } + + void clear() { + hash_.clear(); + GrowingByteBuffer::clear(); + } + + void extended(const uint8_t* oldData, const uint8_t* newData) override { + // Resize has changed buffer location: need to update all pointers in hash. + SYShashIterator > iterator(hash_); + while (++iterator) { + iterator.key().rebase(oldData, newData); + } + } + + bool end() const override { + return false; + } + + // Inserts a new byte string. + BinString* insert(const uint8_t* data, const uint32_t length) { + if (data_ == 0) { // Initial size. + limit_ = 1024; + data_ = new uint8_t[limit_]; + } + const BinString key(data, length); + // If it's not already referenced in our buffer, add it and add a reference to it into hash_. + BinString* binString = hash_.find(key); + if (binString == 0) { + *this << ByteBuffer(data, length); + binString = hash_.insertAndReturn(BinString(data_ + pos_ - length, length)); + } + return binString; + } + + void optimize(FileReader& reader, const uint64_t size); + + uint64_t flush(FileWriter& writer); + + // Gets the memory used by this buffer. + int64_t getSize() const { + return limit() + hash_.getSize(); + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const BinBuffer& binBuffer) { + buffer << ByteBuffer(binBuffer.getData(), binBuffer.position()); + return buffer; +} + +} + +#endif /* #ifndef _engine_binbuffer_h_ */ diff --git a/storage/sparrow/engine/cache.cc b/storage/sparrow/engine/cache.cc new file mode 100644 index 000000000000..bca1d718e3eb --- /dev/null +++ b/storage/sparrow/engine/cache.cc @@ -0,0 +1,537 @@ +/* + Cache. +*/ + +#include "cache.h" +#include "io.h" +#include "fileutil.h" +#include "master.h" +#include "persistent.h" +#include "purge.h" + +namespace Sparrow { + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileCache +////////////////////////////////////////////////////////////////////////////////////////////////////// + +FileCache* FileCache::cache_ = 0; +PSI_file_key FileCache::dataKey_; +PSI_file_key FileCache::indexKey_; +PSI_file_key FileCache::stringKey_; +PSI_file_key FileCache::miscKey_; +PSI_file_info FileCache::psiInfo_[] = { + { &FileCache::dataKey_, "data", 0, PSI_VOLATILITY_UNKNOWN, PSI_DOCUMENT_ME}, + { &FileCache::indexKey_, "index", 0, PSI_VOLATILITY_UNKNOWN, PSI_DOCUMENT_ME}, + { &FileCache::stringKey_, "string", 0, PSI_VOLATILITY_UNKNOWN, PSI_DOCUMENT_ME}, + { &FileCache::miscKey_, "misc", 0, PSI_VOLATILITY_UNKNOWN, PSI_DOCUMENT_ME} +}; + +// STATIC +void FileCache::initialize() _THROW_(SparrowException) { + cache_ = new FileCache(sparrow_open_files); +} + +FileCache::FileCache(uint32_t entries) : Cache("FileCache", &entries, 0, + SparrowStatus::get().fileCacheAcquires_, SparrowStatus::get().fileCacheReleases_, + SparrowStatus::get().fileCacheMisses_, SparrowStatus::get().fileCacheHits_, SparrowStatus::get().fileCacheSlowHits_) { + SPARROW_ENTER("FileCache::FileCache"); +#ifdef HAVE_PSI_INTERFACE + mysql_file_register("sparrow", FileCache::psiInfo_, array_elements(FileCache::psiInfo_)); +#endif + DBUG_PRINT("sparrow_memory", ("Creating file cache with %u entries", entries)); +} + +// STATIC +void FileCache::releaseFile(const FileId& id, const bool remove) { + FileCacheEntry* entry = FileCache::get().acquire(0, id, 0, false, false); + if (entry != 0) { + FileCache::get().release(entry, 0, true, false); + } + if (remove) { + // Delete file. + int err = my_delete(id.getName(), MYF(0)); + if ( err != 0 && my_errno() != ENOENT ) { + char errMsg[MYSYS_STRERROR_SIZE]; + my_strerror(errMsg, sizeof(errMsg), my_errno()); + spw_print_information("Failed to delete %s: error code %d (%s)",id.getName(), my_errno(), errMsg); + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BlockCache +////////////////////////////////////////////////////////////////////////////////////////////////////// + +BlockCache* BlockCache::cache_ = 0; + +// Using large chunks reduces memory "fragmentation" on Solaris, which adds "guard" pages around +// mmap'ed areas. 8MB - 64K is the size used by IVServer, set experimentally. +uint32_t BlockCache::chunkSize_ = 8 * 1024 * 1024 - 64 * 1024; + +BlockCache::BlockCache(uint32_t* entries, BlockCacheEntry** cacheEntries) _THROW_(SparrowException) + : Cache("BlockCache", entries, cacheEntries, + SparrowStatus::get().blockCacheAcquires_, SparrowStatus::get().blockCacheReleases_, + SparrowStatus::get().blockCacheMisses_, SparrowStatus::get().blockCacheHits_, SparrowStatus::get().blockCacheSlowHits_) { + SPARROW_ENTER("BlockCache::BlockCache"); + DBUG_PRINT("sparrow_memory", ("Block size is %u bytes", sparrow_cache_block_size)); + DBUG_PRINT("sparrow_memory", ("Creating level 0 block cache with %u entries (%llu MB)", entries[0], (static_cast(entries[0]) * sparrow_cache_block_size) / 1024 / 1024)); + DBUG_PRINT("sparrow_memory", ("Creating level 1 block cache with %u entries (%llu MB)", entries[1], (static_cast(entries[1]) * sparrow_cache_block_size) / 1024 / 1024)); + DBUG_PRINT("sparrow_memory", ("Creating level 2 block cache with %u entries (%llu MB)", entries[2], (static_cast(entries[2]) * sparrow_cache_block_size) / 1024 / 1024)); + DBUG_PRINT("sparrow_memory", ("Creating level 3 block cache with %u entries (%llu MB)", entries[3], (static_cast(entries[3]) * sparrow_cache_block_size) / 1024 / 1024)); + DBUG_PRINT("sparrow_memory", ("Total size of block cache is %llu MB", ((static_cast(entries[0]) + static_cast(entries[1]) + + static_cast(entries[2]) + static_cast(entries[3])) * sparrow_cache_block_size) / 1024 / 1024)); + + LvlCacheStat stat_per_lvl[4]; + stat_per_lvl[0].hits_ = &SparrowStatus::get().blockCacheLvl0Hits_; + stat_per_lvl[1].hits_ = &SparrowStatus::get().blockCacheLvl1Hits_; + stat_per_lvl[2].hits_ = &SparrowStatus::get().blockCacheLvl2Hits_; + stat_per_lvl[3].hits_ = &SparrowStatus::get().blockCacheLvl3Hits_; + stat_per_lvl[0].slowHits_ = &SparrowStatus::get().blockCacheLvl0SlowHits_; + stat_per_lvl[1].slowHits_ = &SparrowStatus::get().blockCacheLvl1SlowHits_; + stat_per_lvl[2].slowHits_ = &SparrowStatus::get().blockCacheLvl2SlowHits_; + stat_per_lvl[3].slowHits_ = &SparrowStatus::get().blockCacheLvl3SlowHits_; + stat_per_lvl[0].misses_ = &SparrowStatus::get().blockCacheLvl0Misses_; + stat_per_lvl[1].misses_ = &SparrowStatus::get().blockCacheLvl1Misses_; + stat_per_lvl[2].misses_ = &SparrowStatus::get().blockCacheLvl2Misses_; + stat_per_lvl[3].misses_ = &SparrowStatus::get().blockCacheLvl3Misses_; + init_lvl_stat( stat_per_lvl ); +} + +// Initialize cache. Memory is allocated once and for all. +// STATIC +void BlockCache::initialize() _THROW_(SparrowException) { + // Check cache block size is a multiple of sector size. + if (FileUtil::adjustSizeToSectorSize(sparrow_cache_block_size) != sparrow_cache_block_size) { + throw SparrowException::create(false, "sparrow_cache_block_size (%u bytes) is not a multiple of the sector size (%u bytes)", sparrow_cache_block_size, FileUtil::getSectorSize()); + } + +#ifdef _WIN32 + // Check cache block size is a multiple of page size. This is necessary on Windows only because + // of limitations of ReadFileScatter(). + if (sparrow_cache_block_size % FileUtil::getPageSize() != 0) { + throw SparrowException::create(false, "sparrow_cache_block_size (%u bytes) is not a multiple of the page size (%u bytes)", sparrow_cache_block_size, FileUtil::getPageSize()); + } +#endif + + // Check write block size is multiple of cache block size. + if (sparrow_write_block_size % sparrow_cache_block_size != 0) { + throw SparrowException::create(false, "sparrow_write_block_size (%u bytes) is not a multiple of sparrow_cache_block_size (%u bytes)", sparrow_write_block_size, sparrow_cache_block_size); + } + + // Check read block sizes are a multiple of cache block size. + if (sparrow_small_read_block_size < sparrow_cache_block_size || + (sparrow_small_read_block_size % sparrow_cache_block_size) != 0) { + throw SparrowException::create(false, "sparrow_small_read_block_size (%u bytes) is not a multiple of sparrow_cache_block_size (%u bytes)", sparrow_small_read_block_size, sparrow_cache_block_size); + } + if (sparrow_medium_read_block_size < sparrow_cache_block_size || + (sparrow_medium_read_block_size % sparrow_cache_block_size) != 0) { + throw SparrowException::create(false, "sparrow_medium_read_block_size (%u bytes) is not a multiple of sparrow_cache_block_size (%u bytes)", sparrow_medium_read_block_size, sparrow_cache_block_size); + } + if (sparrow_large_read_block_size < sparrow_cache_block_size || + (sparrow_large_read_block_size % sparrow_cache_block_size) != 0) { + throw SparrowException::create(false, "sparrow_large_read_block_size (%u bytes) is not a multiple of sparrow_cache_block_size (%u bytes)", sparrow_large_read_block_size, sparrow_cache_block_size); + } + + // Check small < medium < large read block sizes. + if (sparrow_small_read_block_size >= sparrow_medium_read_block_size) { + throw SparrowException::create(false, "sparrow_small_read_block_size (%u bytes) is larger than sparrow_medium_read_block_size (%u bytes)", sparrow_small_read_block_size, sparrow_medium_read_block_size); + } + if (sparrow_medium_read_block_size >= sparrow_large_read_block_size) { + throw SparrowException::create(false, "sparrow_medium_read_block_size (%u bytes) is larger than sparrow_large_read_block_size (%u bytes)", sparrow_medium_read_block_size, sparrow_large_read_block_size); + } + + // Check size of cache 0 is greater than read block sizes. + if (sparrow_cache0_size < sparrow_large_read_block_size) { + throw SparrowException::create(false, "sparrow_cache0_size (%llu bytes) is smaller than sparrow_large_read_block_size (%u bytes)", static_cast(sparrow_cache0_size), sparrow_large_read_block_size); + } + + // Check size of caches 1..3 is greater than cache block size. + if (sparrow_cache1_size < sparrow_cache_block_size) { + throw SparrowException::create(false, "sparrow_cache1_size (%llu bytes) is smaller than sparrow_cache_block_size (%u bytes)", static_cast(sparrow_cache1_size), sparrow_cache_block_size); + } + if (sparrow_cache2_size < sparrow_cache_block_size) { + throw SparrowException::create(false, "sparrow_cache2_size (%llu bytes) is smaller than sparrow_cache_block_size (%u bytes)", static_cast(sparrow_cache2_size), sparrow_cache_block_size); + } + if (sparrow_cache3_size < sparrow_cache_block_size) { + throw SparrowException::create(false, "sparrow_cache3_size (%llu bytes) is smaller than sparrow_cache_block_size (%u bytes)", static_cast(sparrow_cache3_size), sparrow_cache_block_size); + } + + // Initialize memory locking. + ByteBuffer::initialize(); + + // Initialize IO. + IO::initialize(); + + // Max number of blocks locked by partition readers. + uint64_t minCacheSize = std::min(sparrow_cache0_size, sparrow_cache1_size); + minCacheSize = std::min(minCacheSize, sparrow_cache2_size); + minCacheSize = std::min(minCacheSize, sparrow_cache3_size); + ReferencedBlocks::maxLockedBlocks_ = static_cast(minCacheSize / 2 / sparrow_cache_block_size); + + // Round chunk size to the nearest multiple of sparrow_cache_block_size. + chunkSize_ = (chunkSize_ / sparrow_cache_block_size) * sparrow_cache_block_size; + if (chunkSize_ == 0) { + chunkSize_ = sparrow_cache_block_size; + } + uint32_t entriesPerChunk = chunkSize_ / sparrow_cache_block_size; + + // Allocate entries for all cache levels. + uint32_t entries[4]; + entries[0] = static_cast(sparrow_cache0_size / sparrow_cache_block_size); + entries[1] = static_cast(sparrow_cache1_size / sparrow_cache_block_size); + entries[2] = static_cast(sparrow_cache2_size / sparrow_cache_block_size); + entries[3] = static_cast(sparrow_cache3_size / sparrow_cache_block_size); + uint32_t totalEntries = entries[0] + entries[1] + entries[2] + entries[3]; + BlockCacheEntry* cacheEntries[4]; + memset(cacheEntries, 0, sizeof(cacheEntries)); + uint32_t i = 0; + uint64_t allocated = 0; + uint32_t index = 0; + uint32_t level = 0; + while (i < totalEntries) { + uint32_t chunkEntries = std::min(totalEntries - i, entriesPerChunk); + uint32_t size = chunkEntries * sparrow_cache_block_size; + uint8_t* buffer = ByteBuffer::mmap(size); + if (buffer == 0) { + throw SparrowException::create(true, "Cache initialization: cannot allocate %uKB (already allocated %uMB)", + size / 1024, static_cast(allocated / (1024ULL * 1024))); + } + for (uint32_t offset = 0; offset < size; offset += sparrow_cache_block_size) { + BlockCacheEntry* entry = new BlockCacheEntry(level, FileOffset(), FileBlock(buffer + offset)); + if (cacheEntries[level] != 0) { + cacheEntries[level]->prev_ = entry; + entry->next_ = cacheEntries[level]; + } + cacheEntries[level] = entry; + if (++index == entries[level]) { + level++; + index = 0; + } + } + allocated += size; + i += chunkEntries; + } + + // Build cache. + cache_ = new BlockCache(entries, cacheEntries); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionFile +////////////////////////////////////////////////////////////////////////////////////////////////////// + +PartitionFile::PartitionFile(const PersistentPartition& partition, const uint32_t fileId) + : id_(partition.getMaster().getId()), fileId_(fileId), + serial_(fileId == DATA_FILE || fileId == STRING_FILE ? partition.getDataSerial() : partition.getSerial()) { +} + +const char* PartitionFile::getFileName(char* name) const _THROW_(SparrowException) { + MasterGuard master = MasterId::get(id_); + if (master == 0) { + throw SparrowException::create(false, "PartitionFile: cannot find master with id %u", id_); + } + PartitionGuard partition = master->getPartition(serial_); + if (partition == 0) { + throw SparrowException::create(false, "PartitionFile: cannot find persistent partition %s.%s.%llu", + master->getDatabase().c_str(), master->getTable().c_str(), static_cast(serial_)); + } + return static_cast(partition.get())->getFileName(getFileId(), name); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileHandle +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Opens file. +void FileHandle::initialize(const FileId& id, SYSpVector, 64>& entries, const int hint) _THROW_(SparrowException) { + SPARROW_ENTER("FileHandle::initialize"); +#ifndef _WIN32 + if (lock_ == 0) { + const char* name = id.getName(); + const unsigned int max_len = 64; + if (strlen(name) > max_len) name += strlen(name) - max_len; + lock_ = new Lock(false, (Str("FileHandle::lock_(") + Str(name) + Str(")")).c_str()); + } +#endif + const FileType type = id.getType(); + switch (type) { + case FILE_TYPE_DATA: key_ = FileCache::dataKey_; break; + case FILE_TYPE_INDEX: key_ = FileCache::indexKey_; break; + case FILE_TYPE_STRING: key_ = FileCache::stringKey_; break; + case FILE_TYPE_MISC: key_ = FileCache::miscKey_; break; + default: assert(0); + } + name_ = id.getName(); + const FileMode mode = id.getMode(); + if (mode == FILE_MODE_CREATE) { + IO_STAT_CREATE(this); + file_ = IO::open(name_, mode); + } else { + IO_STAT_OPEN(this); + file_ = IO::open(name_, mode); + } + Atomic::inc64(&SparrowStatus::get().ioOpens_); +} + +// Closes file. +void FileHandle::clear() { + if (file_ != -1) { + { + IO_STAT_CLOSE(this); + IO::close(file_); + } + file_ = -1; + Atomic::inc64(&SparrowStatus::get().ioCloses_); + } +} + +// Seeks and reads from file (atomically). +// Useful when multiple threads are likely to read from a file. +uint32_t FileHandle::read(const uint64_t offset, uint8_t* data, const uint32_t size) const _THROW_(SparrowException) { + IO_STAT_OTHER(this, PSI_FILE_READ, size); + const uint32_t readBytes = IO::read(file_, name_, offset, data, size); + IO_STAT_BYTES(readBytes); + Atomic::add64(&SparrowStatus::get().ioReadBytes_, readBytes); + Atomic::inc64(&SparrowStatus::get().ioReads_); + return readBytes; +} + +uint32_t FileHandle::readMultiple(const uint64_t offset, uint8_t** data, const uint32_t size) const _THROW_(SparrowException) { + IO_STAT_OTHER(this, PSI_FILE_READ, size); +#ifndef _WIN32 + const uint32_t readBytes = IO::readMultiple(file_, lock_, name_, offset, data, size); +#else + const uint32_t readBytes = IO::readMultiple(file_, 0, name_, offset, data, size); +#endif + IO_STAT_BYTES(readBytes); + Atomic::add64(&SparrowStatus::get().ioReadBytes_, readBytes); + Atomic::inc64(&SparrowStatus::get().ioReads_); + return readBytes; +} + +// Writes to file. +uint32_t FileHandle::write(const uint64_t offset, uint8_t* data, const uint32_t size) const _THROW_(SparrowException) { + IO_STAT_OTHER(this, PSI_FILE_WRITE, size); + const uint32_t writtenBytes = IO::write(file_, name_, offset, data, size); + IO_STAT_BYTES(writtenBytes); + Atomic::inc64(&SparrowStatus::get().ioWrites_); + + // Trigger purge if necessary. + const uint64_t count = Atomic::add64(&SparrowStatus::get().ioWrittenBytes_, writtenBytes); + const uint64_t n = Purge::getSecurityMargin() / 2; + if (count / n != (count - writtenBytes) / n) { + Purge::wakeUp(); + } + return writtenBytes; +} + +// Get file size. +uint64_t FileHandle::getSize() const _THROW_(SparrowException) { + if (file_ == -1) { + return 0; + } else { + IO_STAT_OTHER(this, PSI_FILE_STAT, 0); + return FileUtil::getFileSize(file_); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileOffset +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// STATIC +// Initializes a file block from a given file and offset. +void FileOffset::expandId(const FileOffset& id, const BlockCacheHint& hint, FileOffset*& ids, uint32_t& n) _THROW_(SparrowException) { + // Create the key, and set its name. + const uint32_t fileId = id.getFileId(); + FileId key(fileId == DATA_FILE ? FILE_TYPE_DATA : (fileId == STRING_FILE ? FILE_TYPE_STRING : FILE_TYPE_INDEX)); + id.getFileName(key.getName()); + + // Get file from file cache. + FileCacheGuard guard(FileCache::get(), 0, key, 0, false); + const FileHandle& handle = guard.get()->getValue(); + + // Get file size and compute start offset and read size. + const uint64_t fileSize = handle.getSize(); + const uint64_t offset = id.getOffset(); + if (offset >= fileSize) { + throw SparrowException::create(false, "Cannot read file %s at offset %llu: offset is greater than file size (%llu)", + key.getName(), static_cast(offset), static_cast(fileSize)); + } + const uint32_t readBlockSize = hint.getReadBlockSize(); + uint32_t readSize = 0; + uint64_t ioffset = 0; + switch (hint.getDirection()) { + case BlockCacheHint::FORWARD: { + readSize = static_cast(std::min(fileSize - offset, static_cast(readBlockSize))); + ioffset = offset; + break; + } + case BlockCacheHint::BACKWARD: { + const uint64_t limit = std::min(fileSize, offset + static_cast(sparrow_cache_block_size)); + readSize = static_cast(std::min(limit, static_cast(readBlockSize))); + const uint32_t blocks = (readSize + sparrow_cache_block_size - 1) / sparrow_cache_block_size - 1; + ioffset = offset - blocks * sparrow_cache_block_size; + break; + } + case BlockCacheHint::AROUND: { + uint64_t half = readBlockSize / 2; + const uint64_t modulo = half % sparrow_cache_block_size; + if (modulo != 0) { + half += half - modulo; + } + ioffset = offset > half ? offset - half : 0; + const uint64_t limit = std::min(fileSize, offset + half); + readSize = static_cast(limit - ioffset); + break; + } + default: + assert(0); + } + + // Get block entries for scattered read-ahead or read-backward. + // Some blocks may be already initialized. Exclude current block. + n = (readSize + sparrow_cache_block_size - 1) / sparrow_cache_block_size - 1; + ids = static_cast(IOContext::getTempBuffer3(n * sizeof(FileOffset))); + uint32_t j = 0; + for (uint32_t i = 0; i <= n; ++i, ioffset += sparrow_cache_block_size) { + if (ioffset != offset) { + ids[j++] = FileOffset(id, ioffset); + } + } + assert(j == n); +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileBlock +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Initializes a file block from a given file and offset. +void FileBlock::initialize( const FileOffset& id, SYSpVector, 64>& entries, + const BlockCacheHint& hint ) _THROW_(SparrowException) +{ + // Create the key, and set its name. + const uint32_t fileId = id.getFileId(); + FileId key(fileId == DATA_FILE ? FILE_TYPE_DATA : (fileId == STRING_FILE ? FILE_TYPE_STRING : FILE_TYPE_INDEX)); + id.getFileName(key.getName()); + + // Get file from file cache. + FileCacheGuard guard(FileCache::get(), 0, key, 0, false); + const FileHandle& handle = guard.get()->getValue(); + + // Get file size and compute start offset and read size. + const uint64_t fileSize = handle.getSize(); + const uint64_t offset = id.getOffset(); + + uint32_t dataIndex = 0; + for ( ; dataIndexgetId().getOffset() ) + break; + } + + // Exclude leading and trailing entries already filled. Take care of current block. + int first = -1; + int last = -1; + for ( uint32_t j=0; jgetValue().getLength() == 0) { + if (first == -1) { + first = j; + } + last = j; + } + } + if (last >= 0) { + // Some entries are not in the cache: read multiple blocks at once. + uint32_t skip = 0; + uint32_t blocks = 0; + switch (hint.getDirection()) { + case BlockCacheHint::FORWARD: + blocks = 2 + last; + break; + case BlockCacheHint::BACKWARD: + blocks = 1 + entries.entries() - first; + skip = first; + break; + case BlockCacheHint::AROUND: + if (dataIndex >= static_cast(first)) { + if (dataIndex > static_cast(last)) { + blocks = dataIndex - first + 1; + } else { + blocks = last - first + 2; + } + skip = first; + } else { + blocks = last - dataIndex + 2; + skip = dataIndex; + } + break; + default: + assert(0); + } + + // Skip leading filled entries by iterating on the list. + uint32_t j = skip; + dataIndex -= skip; + uint8_t** data = static_cast(IOContext::getTempBuffer3(blocks * sizeof(uint8_t*))); + uint64_t startOffset = hint.getDirection() == BlockCacheHint::FORWARD ? offset : 0; + for (uint32_t i = 0; i < blocks; ++i) { + if (i == dataIndex) { + data[i] = data_; + if (i == 0) { + startOffset = offset; + } + } else { + BlockCacheEntry* entry = entries[j]; + const FileBlock& block = entry->getValue(); + if (block.getLength() == 0) { + data[i] = block.getData(); + if (i == 0) { + startOffset = entry->getId().getOffset(); + } + } else { + assert(i != 0); + data[i] = 0; + } + ++j; + } + } + uint32_t readBytes = handle.readMultiple(startOffset, data, blocks * sparrow_cache_block_size); + if (readBytes < FileUtil::getSectorSize()) { + throw SparrowException::create(false, "Cannot read at most %u bytes from file %s at offset %llu; expected at least %u bytes and got %u bytes", + blocks * sparrow_cache_block_size, key.getName(), static_cast(startOffset), FileUtil::getSectorSize(), readBytes); + } + + // Complete initialization of cache entries. + length_ = static_cast(std::min(fileSize - offset, static_cast(sparrow_cache_block_size))); + readBytes -= length_; + for (uint32_t j=skip; jgetValue(); + if (block.getLength() == 0) { + block.length_ = std::min(readBytes, sparrow_cache_block_size); + entry->setValid(true); + readBytes -= block.length_; + } else { + readBytes -= sparrow_cache_block_size; + } + } + } else { + // All other entries, if any, are already in the cache: read just one block at given offset. + length_ = handle.read(offset, data_, sparrow_cache_block_size); + } + assert(length_ > 0); +} + + +void FileBlock::replace(const FileBlock& value) { + length_ = value.length_; + memcpy(data_, value.data_, length_); +} + + +} + diff --git a/storage/sparrow/engine/cache.h b/storage/sparrow/engine/cache.h new file mode 100644 index 000000000000..043bccfc5a5e --- /dev/null +++ b/storage/sparrow/engine/cache.h @@ -0,0 +1,1243 @@ +/* + Cache. +*/ + +#ifndef _engine_cache_h_ +#define _engine_cache_h_ + +#include "../handler/plugin.h" // For configuration parameters. +#include "exception.h" +#include "types.h" +#include "hash.h" +#include "list.h" +#include "cond.h" +#include "io.h" + +#include "mysql/psi/mysql_file.h" + +#ifdef _WIN32 +#pragma warning(disable:4355) +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CacheEntry +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class CacheEntry : public SYSidlink > { +private: + + uint32_t references_; + uint32_t valid_:1; + uint32_t level_:31; // LRU level from which this entry was initially taken. + ID id_; + V value_; + +private: + + CacheEntry(const CacheEntry& right); + CacheEntry& operator = (const CacheEntry& right); + +public: + + // Default constructor. + CacheEntry() + : references_(0), valid_(false), level_(0) { + } + + // Search (key) constructor. + CacheEntry(const ID& id) + : references_(0), valid_(false), level_(0), id_(id) { + } + + // Constructor with level and value. + CacheEntry(const uint32_t level, const ID& id, const V& value) + : references_(0), valid_(false), level_(level), id_(id), value_(value) { + } + + bool operator == (const CacheEntry& right) const { + if (this != &right) { + return id_ == right.id_; + } else { + return true; + } + } + + void acquire() { + references_++; + } + + bool release() { + assert(references_ > 0); + return --references_ == 0; + } + + bool isReferenced() const { + return references_ > 0; + } + + void setValid(const bool valid) { + valid_ = valid; + } + + bool isValid() const { + return valid_; + } + + const ID& getId() const { + return id_; + } + + void setId(const ID& id) { + id_ = id; + } + + const V& getValue() const { + return value_; + } + + V& getValue() { + return value_; + } + + uint32_t getLevel() const { + return level_; + } + + void setLevel(const uint32_t newLevel) { + level_ = newLevel; + } + + void initialize(SYSpVector, 64>& entry_vect, const H& hint) _THROW_(SparrowException) { + value_.initialize(id_, entry_vect, hint); + } + + void clear() { + value_.clear(); + } + + uint32_t hash() const { + return id_.hash(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CacheStat +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CacheStat { +public: + + volatile uint64_t& acquires_; // Number of calls to acquire(). + volatile uint64_t& releases_; // Number of calls to release(). + volatile uint64_t& misses_; // Number of cache misses when acquire() is called. An IO operation was required. + volatile uint64_t& hits_; // Number of cache hits when acquire() is called. + volatile uint64_t& slowHits_; // Number of "slow" cache hits when acquire() is called. A "slow" hit occurs + // when the entry is not in the cache (it must be read from disc) or, when it is in the cache, but it is still being initialized by + // another thread. In this case, the caller thread must wait until the entry initialization is + // completed by the other thread. + +public: + + CacheStat(volatile uint64_t& acquires, volatile uint64_t& releases, volatile uint64_t& misses, volatile uint64_t& hits, volatile uint64_t& slowHits) + : acquires_(acquires), releases_(releases), misses_(misses), hits_(hits), slowHits_(slowHits) { + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// LvlCacheStat +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class LvlCacheStat { +public: + + volatile uint64_t* misses_; // Number of cache misses when acquire() is called. An IO operation was required. + volatile uint64_t* hits_; // Number of cache hits when acquire() is called. + volatile uint64_t* slowHits_; // Number of "slow" cache hits when acquire() is called. A "slow" hit occurs + // when the entry is not in the cache (it must be read from disc) or, when it is in the cache, but it is still being initialized by + // another thread. In this case, the caller thread must wait until the entry initialization is + // completed by the other thread. + +public: + + LvlCacheStat() : misses_(NULL), hits_(NULL), slowHits_(NULL) { + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CacheLevel +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class CacheLevel { +private: + + const uint32_t level_; + SYSidlist > lru_; // Unreferenced (i.e. can be reused) entries for this level. + const uint32_t capacity_; // Initial number of entries in this level. + Cond cond_; // Condition variable set when the level is not empty. + volatile uint32_t* fillRatio_; + +private: + + static Str getName(const char* name, const uint32_t level); + +public: + + CacheLevel(const uint32_t level, Lock& lock, SYSidlist >& list); + + CacheEntry* acquire(const bool wait = true); + + void remove(CacheEntry* entry); + + void release(CacheEntry* entry); + + void getExtraEntries(SYSidlist >& list); + + void balance(SYSidlist >& list); +}; + +// STATIC +template inline Str CacheLevel::getName(const char* name, const uint32_t level) { + char tmp[128]; + snprintf(tmp, sizeof(tmp), "%s::CacheLevel%u::cond_", name, level); + return Str(tmp); +} + +template inline CacheLevel::CacheLevel(const uint32_t level, Lock& lock, + SYSidlist >& list) : level_(level), capacity_(list.entries()), cond_(false, lock, CacheLevel::getName(lock.getName(), level).c_str()) { + while (!list.isEmpty()) { + lru_.append(list.removeFirst()); + } + switch (level) { + case 0: fillRatio_ = &SparrowStatus::get().blockCacheLvl0FillRatio_; break; + case 1: fillRatio_ = &SparrowStatus::get().blockCacheLvl1FillRatio_; break; + case 2: fillRatio_ = &SparrowStatus::get().blockCacheLvl2FillRatio_; break; + case 3: fillRatio_ = &SparrowStatus::get().blockCacheLvl3FillRatio_; break; + default: fillRatio_ = NULL; break; + } +} + +template inline CacheEntry* CacheLevel::acquire(const bool wait /* = true */) { + while (lru_.isEmpty()) { + if (wait) { + // If the LRU is empty, we have to wait for an available entry. + cond_.wait(true); + } else { + return 0; + } + } + Atomic::inc32(fillRatio_); + return lru_.removeFirst(); +} + +template inline void CacheLevel::remove(CacheEntry* entry) { + lru_.remove(entry); + Atomic::inc32(fillRatio_); +} + +template inline void CacheLevel::release(CacheEntry* entry) { + lru_.append(entry); + Atomic::dec32(fillRatio_); + if (lru_.entries() == 1) { + // The LRU was empty, unblock waiter if any. + cond_.signal(true); + } +} + +template inline void CacheLevel::getExtraEntries(SYSidlist >& list) { + uint32_t prev_entries = lru_.entries(); + while (lru_.entries() > capacity_) { + list.append(lru_.removeFirst()); + } + Atomic::add32(fillRatio_, prev_entries-lru_.entries()); +} + +// Takes entries from the given list and adds then to this level's entries, up to this level max capacity. +template inline void CacheLevel::balance(SYSidlist >& list) { + int added = 0; + while (!list.isEmpty() && lru_.entries() < capacity_) { + CacheEntry* entry = list.removeFirst(); + entry->setLevel(level_); + lru_.prepend(entry); + ++added; + if (lru_.entries() == 1) { + // The LRU was empty, unblock waiter if any. + cond_.signal(true); + } + } + Atomic::add32(fillRatio_, -added); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Cache +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// ID = key type (ex: file ID + offset) +// V = value type (ex: block of data from that file). +// N = number of levels. + +template class Cache : public Lock { +private: + + // Cache hash. + SYSpHash, SYShPoolAllocator*> > hash_; + + // Cache levels. + CacheLevel* levels_[N]; + + // Entries being initialized. + SYSpHash, SYShPoolAllocator*> > initializing_; + + // Condition for initializing entries. + Cond initCond_; + + // Cache statistics. + CacheStat stat_; + LvlCacheStat stat_per_lvl_[N]; + +private: + + static uint32_t getTotalEntries(uint32_t* entries) { + uint32_t totalEntries = 0; + for (uint32_t level = 0; level < N; ++level) { + totalEntries += entries[level]; + } + return totalEntries; + } + + void balance(); + + bool isInitializingNoLock(const ID& id); + bool isInitializingNoLock(const ID* ids, uint32_t n); + bool needsInitializingNoLock(const ID& id); + CacheEntry* acquireNoLock(bool& initialize, const uint32_t level, const ID& id, + const bool create, const bool updateStats); + void acquireMultipleNoLock(const uint32_t level, const ID* ids, const uint32_t n, SYSpVector, 64>& entries); + void releaseNoLock(CacheEntry* entry, const uint32_t newLevel, const bool clear, const bool updateStats); + +public: + + Cache(const char* name, uint32_t* entries, CacheEntry** cacheEntries, + volatile uint64_t& acquires, volatile uint64_t& releases, volatile uint64_t& misses, volatile uint64_t& hits, volatile uint64_t& slowHits); + + // Note there is no destructor; a cache is allocated upon startup and never destroyed. + + CacheEntry* acquire(const uint32_t level, const ID& id, const H& hint, const bool create, const bool updateStats) _THROW_(SparrowException); + void acquireMultiple(const uint32_t level, const ID* ids, const uint32_t n, SYSpVector, 64>& entries); + void release(CacheEntry* entry, const uint32_t newLevel, const bool clear, const bool updateStats); + CacheEntry* releaseAndAcquire(CacheEntry* entry, const uint32_t level, const ID& id, const H& hint) _THROW_(SparrowException); + void initializeEntry(CacheEntry* entry, SYSpVector, 64>& entries, const H& hint) _THROW_(SparrowException); + void init_lvl_stat(const LvlCacheStat* stat_per_lvl); + + void putBack(SYSpVector, 64>& entries); + + void clear(); +}; + +// Creates a new cache for a given number of entries (one number per level). +// If cacheEntries is null, this constructor takes care of filling the level(s) and the hash pool +// with default entries so there is no memory allocation in the future. +// If cacheEntries is not null, the caller will have to provide one list of cache entries for +// each level. +template inline Cache::Cache(const char* name, uint32_t* entries, + CacheEntry** cacheEntries, + volatile uint64_t& acquires, volatile uint64_t& releases, volatile uint64_t& misses, volatile uint64_t& hits, volatile uint64_t& slowHits) + : Lock(false, name), hash_(getTotalEntries(entries)), initializing_(128), + initCond_(false, *this, (Str(name) + Str("::initCond_")).c_str()), stat_(acquires, releases, misses, hits, slowHits) { + for (uint32_t level = 0; level < N; ++level) { + + SYSidlist > list; + if (cacheEntries == 0) { + for (uint32_t i = 0; i < entries[level]; ++i) { + CacheEntry* entry = new CacheEntry(level, ID(), V()); + hash_.insert(entry); // Default entry. + list.append(entry); + } + } else { + CacheEntry* entry = cacheEntries[level]; + while (entry != 0) { + CacheEntry* next = entry->next_; + hash_.insert(entry); // Entry built by the caller. + list.append(entry); + entry = next; + } + } + levels_[level] = new CacheLevel(level, *this, list); + } + + // The pool is filled, as well as the levels: clear the hash. + hash_.clear(); +} + +template inline void Cache::init_lvl_stat(const LvlCacheStat* stat_per_lvl) { + for (uint32_t level = 0; level < N; ++level) { + stat_per_lvl_[level] = stat_per_lvl[level]; + } +} + +template inline void Cache::initializeEntry(CacheEntry* entry, + SYSpVector, 64>& entries, const H& hint) _THROW_(SparrowException) { + try { + entry->clear(); + entry->initialize(entries, hint); + putBack( entries ); + entry->setValid(true); + Guard guard(*this); + initializing_.remove(entry); + hash_.insert(entry); + initCond_.signalAll(true); + } catch(const SparrowException&) { + // Failure; insert it anyway to unblock waiters. + entry->clear(); + entry->setValid(false); + putBack( entries ); + { + Guard guard(*this); + initializing_.remove(entry); + //hash_.insert(entry); // This is weird: storing an invalid entry in the cache. If the reference is 0, there'll be no way to remove it. + initCond_.signalAll(true); + } + + // Release entry before throwing exception. + release(entry, entry->getLevel(), false, true); + throw; + } +} + + +template inline bool Cache::isInitializingNoLock( const ID& id ) +{ + const CacheEntry key(id); + CacheEntry* entry = 0; + entry = initializing_.find(&key); + return ( entry != 0 ); +} + +template inline bool Cache::isInitializingNoLock( const ID* ids, uint32_t n ) +{ + for (uint32_t i = 0; i < n; ++i) { + const ID& id = ids[i]; + if ( isInitializingNoLock( id ) ) { + return true; + } + } + return false; +} + +template inline bool Cache::needsInitializingNoLock( const ID& id ) +{ + const CacheEntry key(id); + CacheEntry* entry = 0; + entry = hash_.find(&key); + return ( entry == 0 ); +} + + +template inline CacheEntry* Cache::acquireNoLock(bool& initialize, const uint32_t level, + const ID& id, const bool create, const bool updateStats) { + CacheEntry* entry = 0; + { + const CacheEntry key(id); + if (updateStats) { + stat_.acquires_++; + } + entry = hash_.find(&key); + if (entry == 0) { + assert( initializing_.find(&key) == 0 ); + + // Return 0 if the caller does not want the entry to be created. + if (!create) { + return 0; + } + + // No entry found; initialize one. + entry = levels_[level]->acquire(); + + // Make sure the entry is not accessible. + hash_.remove(entry); + entry->setId(id); + entry->acquire(); + + // The entry will be initialized below, outside the lock. + initializing_.insert(entry); + initialize = true; + } else { + if (!entry->isReferenced()) { + // If the entry is not referenced, it is in its level. + // Remove it from its level so it cannot be reused. + levels_[entry->getLevel()]->remove(entry); + } + entry->acquire(); + } + } + return entry; +} + +// Acquire multiple entries for the given array of keys. +template inline void Cache::acquireMultipleNoLock(const uint32_t level, + const ID* ids, const uint32_t n, SYSpVector, 64>& entries) +{ + if (n == 0) { + return; + } + + for (uint32_t i = 0; i < n; ++i) + { + const ID& id = ids[i]; + const CacheEntry key(id); + CacheEntry* entry = 0; + entry = hash_.find(&key); + + if (entry == 0) { + assert( isInitializingNoLock( id ) == 0 ); + + // No entry found; take one from the level and initialize it. + entry = levels_[level]->acquire(); + + // Make sure the entry is not accessible. + hash_.remove(entry); + entry->setId(id); + entry->clear(); + entry->setValid(false); + entry->acquire(); + + // The entry will be initialized by the caller, outside the lock. + initializing_.insert(entry); + } else { + if (!entry->isReferenced()) { + // If the entry is not referenced, it is in its level. + // Remove it from its level so it cannot be reused. + levels_[entry->getLevel()]->remove(entry); + } + entry->acquire(); + } + entry->prev_ = NULL; + entry->next_ = NULL; + entries.append(entry); + } +} + + +// Gets or initializes a cache entry for the given key. An exception is thrown +// if the cache entry initialization fails (I/O error, etc). +// In this case, the entry is in the cache, but marked as not valid. +// So the caller should always check entry->isValid() before using the entry. +template inline CacheEntry* Cache::acquire(const uint32_t level, + const ID& id, const H& hint, const bool create, const bool updateStats) _THROW_(SparrowException) { + bool initialize = false; + CacheEntry* entry = 0; + bool hasWaited = false; + + // Use hint to expand list of ids to acquire + ID* ids = NULL; // contains additional ids deduced from hint + uint32_t n = 0; + ID::expandId( id, hint, ids, n ); + + SYSpVector, 64> additional_entries; + + bool acquired = false; + + do + { + Guard guard(*this); + + // If block is being initialized, wait. + const CacheEntry key(id); + entry = initializing_.find(&key); + if ( entry == 0 ) + { + if ( n == 0 ) { + entry = acquireNoLock( initialize, level, id, create, updateStats ); + acquired = true; + } else { + // If the value needs to be initialized, we need to reserve the optional additional blocks that will be used during initialization + // If it's not possible to reserve all blocks, now, don't reserve anything. + if ( needsInitializingNoLock( id ) ) { + if ( !isInitializingNoLock( ids, n ) ) { + entry = acquireNoLock( initialize, level, id, create, updateStats ); + assert( initialize == true ); + acquireMultipleNoLock( level, ids, n, additional_entries ); + assert( !create || additional_entries.entries() == n ); + acquired = true; + } else { + // Possible deadlock, so don't do anything (wait). + } + } else { + entry = acquireNoLock( initialize, level, id, create, updateStats ); + assert( initialize == false ); + acquired = true; + } + } + } + + if ( !acquired ) { + //spw_print_error("acquire put to wait: Id is initializing."); + initCond_.wait(true); + hasWaited = true; + } + } while ( !acquired ); + + if (updateStats) { + if (initialize) { + stat_.misses_++; + if ( stat_per_lvl_[level].misses_ != NULL ) { + (*stat_per_lvl_[level].misses_)++; + } + } + if (hasWaited) { + stat_.slowHits_++; + if ( stat_per_lvl_[level].slowHits_ != NULL ) { + (*stat_per_lvl_[level].slowHits_)++; + } + } else { + stat_.hits_++; + if ( stat_per_lvl_[level].hits_ != NULL ) { + (*stat_per_lvl_[level].hits_)++; + } + } + } + + if ( initialize ) { + initializeEntry( entry, additional_entries, hint ); + } + + return entry; +} + + +// Acquire multiple entries for the given array of keys. +template inline void Cache::acquireMultiple(const uint32_t level, + const ID* ids, + const uint32_t n, + SYSpVector, 64>& entries) { + if (n == 0) { + return; + } + Guard guard(*this); + for (uint32_t i = 0; i < n; ++i) { + const ID& id = ids[i]; + const CacheEntry key(id); + CacheEntry* entry = 0; + for ( ; ; ) { + entry = hash_.find(&key); + if (entry == 0) { + // Entry not in the cache. Maybe it is being initialized? + entry = initializing_.find(&key); + if (entry == 0) { + // Entry does not exist. + break; + } else { + for ( ; ; ) { + // Wait until entry is initialized and retry. + initCond_.wait(1000, true); + if (initializing_.find(&key) == 0) { + break; + } + } + } + } else { + break; + } + } + if (entry == 0) { + // No entry found; take one from the level and initialize it. + entry = levels_[level]->acquire(); + + // Make sure the entry is not accessible. + hash_.remove(entry); + entry->setId(id); + entry->clear(); + entry->setValid(false); + entry->acquire(); + + // The entry will be initialized by the caller, outside the lock. + initializing_.insert(entry); + } else { + if (!entry->isReferenced()) { + // If the entry is not referenced, it is in its level. + // Remove it from its level so it cannot be reused. + levels_[entry->getLevel()]->remove(entry); + } + entry->acquire(); + } + entry->prev_ = NULL; + entry->next_ = NULL; + entries.append(entry); + } +} + + +// Adjust levels to their initial capacity. +template inline void Cache::balance() { + if (N > 1) { + SYSidlist > list; + for (uint32_t level = 0; level < N; ++level) { + levels_[level]->getExtraEntries(list); + } + if (!list.isEmpty()) { + for (uint32_t level = 0; level < N; ++level) { + levels_[level]->balance(list); + } + assert(list.isEmpty()); + } + } +} + +template inline void Cache::releaseNoLock(CacheEntry* entry, + const uint32_t newLevel, const bool clear, const bool updateStats) { + if (updateStats) { + stat_.releases_++; + } + if (entry->release()) { + if (clear) { + entry->clear(); + hash_.remove(entry); + } + const uint32_t level = entry->getLevel(); + if (level == newLevel) { + levels_[level]->release(entry); + } else { + entry->setLevel(newLevel); + levels_[newLevel]->release(entry); + } + balance(); + } +} + +// Releases a cache entry. If it is no longer referenced, put it in the level. +// If the clear flag is true, in addition to put the entry back into the level, it is +// cleared and removed from the hash table. +// The cache entry is moved to another level by specifying a newLevel different +// from the current entry's level. +template inline void Cache::release(CacheEntry* entry, + const uint32_t newLevel, const bool clear, const bool updateStats) { + Guard guard(*this); + releaseNoLock(entry, newLevel, clear, updateStats); +} + +template inline CacheEntry* Cache::releaseAndAcquire(CacheEntry* entry, + const uint32_t level, const ID& id, const H& hint) _THROW_(SparrowException) { + { + Guard guard(*this); + releaseNoLock(entry, level, false, true); + } + return acquire(level, id, hint, true, true); +} + +// Put back the given entries. +template inline void Cache::putBack(SYSpVector, 64>& entries) { + if (entries.isEmpty()) { + return; + } + Guard guard(*this); + + for ( uint i=0; i* entry = entries[i]; + assert(entry->prev_ == NULL && entry->next_ == NULL ); + const uint32_t level = entry->getLevel(); + if (initializing_.remove(entry) != 0) { + initCond_.signalAll(true); + hash_.insert(entry); + } + if (entry->release()) { + levels_[level]->release(entry); + } + } + entries.clear(); + balance(); +} + +// Clear all entries in cache. +template inline void Cache::clear() { + Guard guard(*this); + for (uint32_t level = 0; level < N; ++level) { + CacheEntry* entry; + while ((entry = levels_[level]->acquire(false)) != 0) { + entry->clear(); + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CacheGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class CacheGuard { +private: + + Cache& cache_; + CacheEntry* entry_; + const bool clear_; + +public: + + CacheGuard(Cache& cache, const uint32_t level, const ID& id, const H& hint, const bool clear) _THROW_(SparrowException) + : cache_(cache), entry_(cache.acquire(level, id, hint, true, true)), clear_(clear) { + } + + ~CacheGuard() { + cache_.release(entry_, entry_->getLevel(), clear_, true); + } + + CacheEntry* get() { + return entry_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileId +////////////////////////////////////////////////////////////////////////////////////////////////////// + +enum FileType { + FILE_TYPE_MISC = 0, + FILE_TYPE_DATA = 1, + FILE_TYPE_INDEX = 2, + FILE_TYPE_STRING = 3 +}; + +class FileId { +protected: + + char name_[FN_REFLEN]; + + // File type. + FileType type_; + + // File mode. + // Note: caching handles for files being written/appended does not make sense, except that we do not + // want to exceed the maximum number of opened files, so we rely on the file cache to achieve this. + FileMode mode_; + +public: + + FileId(const FileType type = FILE_TYPE_MISC, const FileMode mode = FILE_MODE_READ) : type_(type), mode_(mode) { + name_[0] = 0; + } + + FileId(const char* name, const FileType type, const FileMode mode) : type_(type), mode_(mode) { + strcpy(name_, name); + } + + bool operator == (const FileId& right) const { + if (this != &right) { + return mode_ == right.mode_ && strcmp(name_, right.name_) == 0; + } else { + return true; + } + } + + const char* getName() const { + return name_; + } + + char* getName() { + return name_; + } + + FileType getType() const { + return type_; + } + + FileMode getMode() const { + return mode_; + } + + static void expandId(const FileId& id, const int& hint, FileId*& ids, uint32_t& n) _THROW_(SparrowException) { + } + + + uint32_t hash() const { + uint32_t result = 31 + static_cast(mode_); + int off = 0; + for ( ; ; ) { + const uint8_t v = static_cast(name_[off++]); + if (v == 0) { + break; + } + result = 31 * result + v; + } + return result; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileHandle +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FileHandle { +private: + + const char* name_; + File file_; + PSI_file_key key_; +#ifndef _WIN32 + Lock* lock_; +#endif + +public: + + FileHandle() : file_(-1) +#ifndef _WIN32 + , lock_(0) +#endif + { + } + + FileHandle& operator = (const FileHandle& right) { + file_ = right.file_; + + // No need to copy other attributes. + return *this; + } + + FileHandle(const FileHandle& right) { +#ifndef _WIN32 + lock_ = 0; +#endif + *this = right; + } + + ~FileHandle() { +#ifndef _WIN32 + delete lock_; +#endif + } + + void initialize(const FileId& id, SYSpVector, 64>& entries, const int hint) _THROW_(SparrowException); + + void clear(); + + uint32_t read(const uint64_t offset, uint8_t* data, const uint32_t size) const _THROW_(SparrowException); + + uint32_t readMultiple(const uint64_t offset, uint8_t** data, const uint32_t size) const _THROW_(SparrowException); + + uint32_t write(const uint64_t offset, uint8_t* data, const uint32_t size) const _THROW_(SparrowException); + + uint64_t getSize() const _THROW_(SparrowException); + + const char* getName() const { + return name_; + } + + File getFile() const { + return file_; + } + + PSI_file_key getKey() const { + return key_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileCache +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef CacheEntry FileCacheEntry; +typedef CacheGuard FileCacheGuard; + +class FileCache : public Cache { +private: + + static FileCache* cache_; + static PSI_file_info psiInfo_[]; + +public: + + static PSI_file_key dataKey_; + static PSI_file_key indexKey_; + static PSI_file_key stringKey_; + static PSI_file_key miscKey_; + +public: + + FileCache(uint32_t entries); + static void initialize() _THROW_(SparrowException); + static FileCache& get() { + return *cache_; + } + + static void releaseFile(const FileId& id, const bool remove); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionFile +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PersistentPartition; +class PartitionFile { +protected: + + uint32_t id_; // Id of master file. + uint32_t fileId_; // DATA_FILE for data file, or STRING_FILE for string file, or id in array of indexes. + uint64_t serial_; // Partition serial number. + +public: + + PartitionFile() : id_(0), fileId_(0), serial_(0) { + } + + PartitionFile(const PersistentPartition& partition, const uint32_t index); + + PartitionFile(const PartitionFile& right) : id_(right.id_), fileId_(right.fileId_), serial_(right.serial_) { + } + + bool operator == (const PartitionFile& right) const { + if (this != &right) { + return id_ == right.id_ && fileId_ == right.fileId_ && serial_ == right.serial_; + } else { + return true; + } + } + + bool operator != (const PartitionFile& right) const { + return !(*this == right); + } + + uint64_t getSerial() const { + return serial_; + } + + uint32_t getFileId() const { + return fileId_; + } + + const char* getFileName(char* name) const _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ReadCacheHint +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ReadCacheHint { +public: + + virtual const BlockCacheHint& getBlockHint() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// WriteCacheHint +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class WriteCacheHint { +public: + + virtual const PartitionFile& getPartitionFile() const = 0; + + virtual uint32_t getLevel() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SimpleWriteCacheHint +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SimpleWriteCacheHint : public WriteCacheHint { +private: + + const PartitionFile& file_; + const uint32_t level_; + +public: + + SimpleWriteCacheHint(const PartitionFile& file, const uint32_t level) : file_(file), level_(level) { + } + + const PartitionFile& getPartitionFile() const override { + return file_; + } + + uint32_t getLevel() const override { + return level_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileOffset +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FileOffset : public PartitionFile { +protected: + + uint64_t offset_; + +public: + + FileOffset() : PartitionFile(), offset_(0) { + } + + FileOffset(const PartitionFile& partitionFile) : PartitionFile(partitionFile), offset_(0) { + } + + FileOffset(const PersistentPartition& partition, const uint32_t fileId, const uint64_t offset) : PartitionFile(partition, fileId), offset_(offset) { + } + + FileOffset(const FileOffset& right, const uint64_t offset) : PartitionFile(right), offset_(offset) { + } + + bool operator == (const FileOffset& right) const { + if (this != &right) { + return offset_ == right.offset_ && static_cast(*this) == right; + } else { + return true; + } + } + + uint64_t getOffset() const { + return offset_; + } + + void setOffset(const uint64_t offset) { + offset_ = offset; + } + + static void expandId(const FileOffset& id, const BlockCacheHint& hint, FileOffset*& ids, uint32_t& n) _THROW_(SparrowException); + + uint32_t hash() const { + uint32_t result = 31 + id_; + result = 31 * result + fileId_; + result = 31 * result + static_cast(serial_ ^ (serial_ >> 32)); + result = 31 * result + static_cast(offset_ ^ (offset_ >> 32)); + return result; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileBlock +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FileBlock { +private: + + uint8_t* data_; // Allocated once and for all, points to a block with size == sparrow_cache_block_size + uint32_t length_; // Actual length of read block, <= sparrow_cache_block_size. + +public: + + FileBlock() : data_(0), length_(0) { + } + + FileBlock(uint8_t* data, const uint32_t length = 0) : data_(data), length_(length) { + } + + uint8_t* getData() const { + return data_; + } + + uint32_t getLength() const { + return length_; + } + + void initialize(const FileOffset& id, SYSpVector, 64>& entries, const BlockCacheHint& hint) _THROW_(SparrowException); + void replace(const FileBlock& value); + void clear() { + length_ = 0; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BlockCache +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef CacheEntry BlockCacheEntry; +typedef SYSpVector BlockCacheEntries; +typedef SYSidlistIterator BlockCacheIterator; + +class BlockCache : public Cache { +private: + + static BlockCache* cache_; + static uint32_t chunkSize_; + +public: + + BlockCache(uint32_t* entries, BlockCacheEntry** cacheEntries) _THROW_(SparrowException); + static void initialize() _THROW_(SparrowException); + static BlockCache& get() { + return *cache_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BlockCacheEntriesGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class BlockCacheEntriesGuard { +private: + + BlockCacheEntries entries_; + +public: + + BlockCacheEntriesGuard() { + } + ~BlockCacheEntriesGuard() { + BlockCache::get().putBack(entries_); + } + BlockCacheEntries& get() { + return entries_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileStatGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FileStatGuard { +private: + + const FileHandle& handle_; + const PSI_file_operation operation_; + PSI_file_locker_state state_; + struct PSI_file_locker* locker_{nullptr}; + size_t bytes_; + +public: + + FileStatGuard(const FileHandle& handle, const char* file, const uint32_t line, const PSI_file_operation operation, const size_t count) + : handle_(handle), operation_(operation), bytes_(0) { +#ifdef HAVE_PSI_FILE_INTERFACE + if (operation_ == PSI_FILE_OPEN || operation_ == PSI_FILE_CREATE) { + const char* name = handle.getName(); + const size_t l = strlen(name); + name += l > PFS_MAX_INFO_NAME_LENGTH ? (l - PFS_MAX_INFO_NAME_LENGTH) : 0; + locker_ = PSI_FILE_CALL(get_thread_file_name_locker)(&state_, handle_.getKey(), operation_, name, &locker_); + } else { + locker_ = PSI_FILE_CALL(get_thread_file_descriptor_locker)(&state_, handle_.getFile(), operation_); + } + if (locker_ != nullptr) { + if (operation_ == PSI_FILE_OPEN || operation_ == PSI_FILE_CREATE) { + PSI_FILE_CALL(start_file_open_wait)(locker_, file, line); + } else { + PSI_FILE_CALL(start_file_wait)(locker_, count, file, line); + } + } +#endif + } + + void setBytes(const size_t bytes) { + bytes_ = bytes; + } + + ~FileStatGuard() { +#ifdef HAVE_PSI_FILE_INTERFACE + if (locker_ != nullptr) { + if (operation_ == PSI_FILE_OPEN || operation_ == PSI_FILE_CREATE) { + PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(locker_, handle_.getFile()); + } else { + PSI_FILE_CALL(end_file_wait)(locker_, bytes_); + } + } +#endif + } +}; + +#define IO_STAT_CREATE(H) FileStatGuard __ioguard(*H, __FILE__, __LINE__, PSI_FILE_CREATE, 0) +#define IO_STAT_OPEN(H) FileStatGuard __ioguard(*H, __FILE__, __LINE__, PSI_FILE_OPEN, 0) +#define IO_STAT_CLOSE(H) FileStatGuard __ioguard(*H, __FILE__, __LINE__, PSI_FILE_CLOSE, 0) +#define IO_STAT_OTHER(H, O, B) FileStatGuard __ioguard(*H, __FILE__, __LINE__, O, static_cast(B)) +#define IO_STAT_BYTES(B) __ioguard.setBytes(B) + +} + +#endif /* #ifndef _engine_cache_h_ */ + diff --git a/storage/sparrow/engine/coalescing.cc b/storage/sparrow/engine/coalescing.cc new file mode 100644 index 000000000000..75b33cfeb281 --- /dev/null +++ b/storage/sparrow/engine/coalescing.cc @@ -0,0 +1,503 @@ +/* + Partition coalescing. +*/ + +#include "coalescing.h" + +#include "../engine/log.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const JobThreadFactory CoalescingWorker::factory_("CoalescingWorker"); +JobThreadPool* CoalescingWorker::threadPool_ = 0; +Lock CoalescingWorker::lock_(true, "CoalescingWorker::lock_"); + +// STATIC +void CoalescingWorker::initialize() _THROW_(SparrowException) { + // The queue is not bulk because we want jobs to be distributed across workers. + threadPool_ = new JobThreadPool(CoalescingWorker::factory_, &sparrow_max_coalescing_threads, + &SparrowStatus::get().coalescingThreads_, "CoalescingWorker::Queue", false); +} + +// STATIC +void CoalescingWorker::shutdown() { + if (threadPool_ != 0) { + threadPool_->stop(); + delete threadPool_; + threadPool_ = 0; + } +} + +// STATIC +void CoalescingWorker::sendJob(Job* job) { + threadPool_->send(job); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingMainTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void CoalescingMainTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + SPARROW_ENTER("CoalescingMainTask::run"); + try { + const PersistentPartitions allPartitions(partitions_); + PersistentPartition* coalescedPartition = Coalescing::generateDataFile(partitions_, *this); + Atomic::inc64(&SparrowStatus::get().coalescingMainTaskProcessed_); + if (isStopping()) { + delete coalescedPartition; + } else { + WriteGuard guard(get()->getLock()); // Because each created CoalescingIndexTask calls Master::registerCoalescingTask() which requires the lock. + Coalescing::triggerIndexCoalescing(get(), allPartitions, partitions_, coalescedPartition, indexIds_); + } + } catch(const SparrowException& e) { + spw_print_error("Failed to coalesce data files for table %s.%s: %s", + get()->getDatabase().c_str(), get()->getTable().c_str(), e.getText()); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingIndexTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void CoalescingIndexTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + SPARROW_ENTER("CoalescingIndexTask::run"); + try { + if ( Coalescing::generateIndexFile(partitions_, index_, coalescedPartition_, this) == 0 ) { + flags_->setAborted(); + } else { + Atomic::inc64(&SparrowStatus::get().coalescingIndexTaskProcessed_); + } + } catch(const SparrowException& e) { + flags_->setAborted(); + spw_print_error("Failed to coalesce index %u files for table %s.%s: %s", + index_, get()->getDatabase().c_str(), get()->getTable().c_str(), e.getText()); + } + finished(); + run_ = true; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Coalescing +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// STATIC +void Coalescing::triggerIndexCoalescing(Master* master, const PersistentPartitions& allPartitions, + const PersistentPartitions& partitions, PersistentPartition* coalescedPartition, const IndexIds& indexIds) _THROW_(SparrowException) { + SPARROW_ENTER("Coalescing::triggerIndexCoalescing"); + const uint32_t nbIndexes = indexIds.length(); + if (coalescedPartition->getRecords() > 0 && nbIndexes > 0) { + char name[256]; + snprintf(name, sizeof(name), "CoalescingIndexTask_%llu", static_cast(coalescedPartition->getSerial())); + CoalescingFlags* flags = new CoalescingFlags(name); + for (uint32_t i = 0; i < nbIndexes; ++i) { + Scheduler::addTask(new CoalescingIndexTask(master, allPartitions, partitions, coalescedPartition, indexIds[i], flags)); + } + } else { + master->coalescingDone(coalescedPartition, allPartitions); + } +} + +// Appends the data files of the given persistent partitions and returns the coalesced persistent partition. +// STATIC +PersistentPartition* Coalescing::generateDataFile(PersistentPartitions& partitions, + const Task& task) _THROW_(SparrowException) { + SPARROW_ENTER("Coalescing::generateDataFile"); +#ifndef NDEBUG + const uint64_t tstart = my_micro_time(); + FileHeader debugHeader; +#endif + const uint32_t nbPartitions = partitions.length(); + assert(nbPartitions > 1); + PersistentPartition& firstPartition = *partitions[0]; + assert(firstPartition.getVersion() < PersistentPartition::appendVersion_); + Master& master = firstPartition.getMaster(); + UpdateGuard updateGuard(master); + uint64_t minTimestamp = ULLONG_MAX; + uint64_t maxTimestamp = 0; + uint32_t records = 0; + for (uint32_t i = 0; i < nbPartitions; ++i) { + const PersistentPartition& partition = *partitions[i]; + records += partition.getRecords(); + const TimePeriod period = partition.getPeriod(); + minTimestamp = std::min(minTimestamp, period.getMin()); + maxTimestamp = std::max(maxTimestamp, period.getMax()); + } + TableFieldsGuard fieldsGuard; + TableFields& fields = fieldsGuard.get(); + PersistentPartition* coalescedPartition = 0; + const uint32_t columnAlterSerial = firstPartition.getColumnAlterSerial(); + { + WriteGuard guard(master.getLock()); + master.getFields(columnAlterSerial, true, fields, NULL); + coalescedPartition = master.newPersistentPartition(firstPartition.getVersion(), SAME_AS_SERIAL, FileUtil::chooseFilesystem(false), + TimePeriod(minTimestamp, maxTimestamp), records, firstPartition.getIndexAlterSerial(), columnAlterSerial, records, 0, firstPartition.getSkippedColumns()); + } + const uint32_t nbFields = fields.length(); + uint64_t discardedRecords = 0; + AutoPtr partitionGuard(coalescedPartition); + const SerialRecordWrapper recordWrapper(columnAlterSerial, DATA_FILE, false, fields, 0); + char filename[FN_REFLEN]; + { + const PartitionFile partitionFile(*coalescedPartition, DATA_FILE); + const SimpleWriteCacheHint writeHint(partitionFile, 0); + FileWriter writer(coalescedPartition->getFileName(DATA_FILE, filename), FILE_TYPE_DATA, FILE_MODE_CREATE, &writeHint); + PersistentPartition::writeFileFormat(writer); + + // Write records. + BinBuffer binBuffer; + uint32_t i = 0; + uint8_t bitArray[SPARROW_MAX_BIT_SIZE]; + while (i < partitions.length()) { + const PersistentPartition& partition = *partitions[i]; + bool opened = false; + try { + PartitionReaderGuard readerGuard(partition, DATA_FILE, false, BlockCacheHint::largeForward0_); + PartitionReader& reader = readerGuard.get(); + PartitionReaderGuard stringReaderGuard(partition, DATA_FILE, true, BlockCacheHint::largeAround0_); + PartitionReader& stringReader = stringReaderGuard.get(); + opened = true; + const FileHeaderBase& header = reader.getHeader(); + reader.seekRecord(0); + const uint64_t nbRecords = header.getRecords(); + for (uint64_t j = 0; j < nbRecords; ++j) { + if ((j % 16384) == 0 && task.isStopping()) { + return partitionGuard.release(); + } + recordWrapper.readBits(reader, bitArray); + writer << ByteBuffer(bitArray, recordWrapper.getBitSize()); + int bitOffset = 0; + for (uint32_t k = 0; k < nbFields; ++k) { + const FieldBase* field = fields[k]; + if (field == 0) { + continue; + } + const int nbits = field->getBits(); + uint32_t bits = 0; + for (int b = 0; b < nbits; ++b, ++bitOffset) { + bits |= ((bitArray[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + } + field->copy(reader, stringReader, bits, writer, &binBuffer); + } + } + ++i; + } catch(const SparrowException& e) { + if (!opened) { + discardedRecords += partition.getRecords(); + partitions.removeAt(i); + } else { + throw e; + } + } + } + + // Remove discarded records. + coalescedPartition->decRecords(static_cast(discardedRecords)); + + // Write binary data. + const uint64_t offset = writer.getFileOffset(); + writer << binBuffer; + const uint64_t binSize = writer.getFileOffset() - offset; + + // Padding to put header at the end of the file, taking into account its adjusted size. + const TimePeriod& period = coalescedPartition->getPeriod(); + const FileHeader header(binSize, 0, false, 0, recordWrapper.getSize(), coalescedPartition->getRecords(), + DATA_FILE, period.getMin(), period.getMax()); +#ifndef NDEBUG + debugHeader = header; +#endif + const uint64_t dataSize = header.getTotalSize(); + coalescedPartition->setDataSize(dataSize); + const uint64_t target = dataSize - FileHeader::size(); + while (writer.getFileOffset() < target) { + writer << static_cast(0); + } + + // Write header. + writer << header; + writer.write(); + } +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + const Str sizeTotal(Str::fromSize(debugHeader.getTotalSize())); + const Str sizeRecords(Str::fromSize(debugHeader.getRecordSize() * debugHeader.getRecords())); + const Str sizeBin(Str::fromSize(debugHeader.getBinSection().getSize())); + DBUG_PRINT("sparrow_coalescing", ("Written data file of coalesced partition %s.%s.%llu (%u partitions, %s total = %s records + %s bin) in %s", + master.getDatabase().c_str(), master.getTable().c_str(), static_cast(coalescedPartition->getSerial()), nbPartitions, + sizeTotal.c_str(), sizeRecords.c_str(), sizeBin.c_str(), duration.c_str())); +#endif + return partitionGuard.release(); +} + +template class Sort; +template class BinarySearch; + +// Merge the index files of the given partitions for the given index. +// STATIC +uint64_t Coalescing::generateIndexFile(const PersistentPartitions& partitions, const uint32_t index, PersistentPartition* coalescedPartition, + const Task* task) _THROW_(SparrowException) { + SPARROW_ENTER("Coalescing::generateIndexFile"); +#ifndef NDEBUG + const uint64_t tstart = my_micro_time(); + FileHeader debugHeader; +#endif + PersistentPartition& firstPartition = *partitions[0]; + Master& master = firstPartition.getMaster(); + TableFieldsGuard fieldsGuard; + TableFields& fields = fieldsGuard.get(); + ColumnIds columnIds; + const uint32_t columnAlterSerial = firstPartition.getColumnAlterSerial(); + { + WriteGuard guard(master.getLock()); + master.getFields(columnAlterSerial, true, fields, NULL); + columnIds = master.getIndexes()[index].getColumnIds(); + + // If the index files do not exist (because the index references new fields not present when partitions were generated), + // trigger ADD index on coalesced partition instead of coalescing index files. + if (!firstPartition.isTemporary()) { + for (uint32_t i = 0; i < columnIds.length(); ++i) { + if (fields[columnIds[i]] == 0) { + const uint32_t indexAlterSerial = master.getIndexAlterSerial(); + const Partitions& mpartitions = master.getPartitions(); + for (uint32_t j = 0; j < mpartitions.length(); ++j) { + Partition& p = *mpartitions[j]; + if (!p.isTransient() && !p.isTemporary() && p.getIndexAlterSerial() == indexAlterSerial) { + p.setIndexAlterSerial(indexAlterSerial + 1); + } + } + Alterations alterations = master.getIndexAlterations(); + alterations.append(Alteration(ALT_ADD_INDEX, indexAlterSerial + 1, index)); + master.setIndexAlterations(alterations); + master.setIndexAlterSerial(indexAlterSerial + 1); + master.toDisk(); + return 0; + } + } + } + } + DBUG_PRINT("sparrow_coalescing", ("Start coalescing %u partitions for index %u into partition %s.%s.%llu", partitions.length(), index, + master.getDatabase().c_str(), master.getTable().c_str(), static_cast(coalescedPartition->getSerial()))); + const uint32_t nbPartitions = partitions.length(); + RowOffsets rowOffsets(nbPartitions); + uint32_t offset = 0; + for (uint32_t i = 0; i < nbPartitions; ++i) { + rowOffsets.append(offset); + offset += static_cast(partitions[i]->getDataRecords()); + } + const SerialRecordWrapper recordWrapper(columnAlterSerial, index, false, fields, &columnIds); + const uint32_t recordSize = recordWrapper.getSize() - 4; // TODO adjust row size + const uint32_t nodeSize = recordSize + 2 * 4; // TODO row size + CoalescingReaders readers(partitions, index); + Positions positions(nbPartitions); + KeyIndirector indirector(nbPartitions); + KeyValues keyValues; + keyValues.reshape((nbPartitions + 1) * recordSize); + BinBuffer binBuffer; + const bool isAppend = coalescedPartition->getVersion() >= PersistentPartition::appendVersion_; + BinBuffer* pBinBuffer = isAppend ? 0 : &binBuffer; + + // Move readers to first index position. + offset = 0; + for (uint32_t i = 0; i < nbPartitions; ++i, offset += recordSize) { + if (task != 0 && task->isStopping()) { + return 0; + } + + // Use smallest node in tree (guaranteed to be the smallest index value). + indirector.append(i); + PartitionReaderGuard readerGuard(readers.get(i, 0)); + PartitionReader& reader = readerGuard.get(); + PartitionReaderGuard stringReaderGuard(readers.get(i, 1)); + PartitionReader& stringReader = stringReaderGuard.get(); + const uint32_t node = reader.getHeader().getMinNode(); + reader.seekTree(node); + uint32_t start; + uint32_t end; + reader >> start >> end; + ByteBuffer b(keyValues.data() + offset, recordSize); + recordWrapper.readKeyValue(reader, stringReader, b, pBinBuffer); + reader.seekRecord(start); + uint32_t row; + reader >> row; + positions.append(Position(i, row, start, start, end, node)); + } + + // Sort positions. + { + const CoalescingComparator comparator(readers, recordWrapper, keyValues, pBinBuffer); + Sort::quickSort(indirector, comparator, 0, nbPartitions); +#ifndef NDEBUG + if (nbPartitions > 1) { + for (uint32_t i = 0; i < nbPartitions - 1; ++i) { + ByteBuffer b1(keyValues.data() + indirector[i] * recordSize, recordSize); + ByteBuffer b2(keyValues.data() + indirector[i + 1] * recordSize, recordSize); + PartitionReaderGuard stringReaderGuard1(readers.get(indirector[i], 1)); + PartitionReaderGuard stringReaderGuard2(readers.get(indirector[i + 1], 2)); + const int cmp = recordWrapper.compare(b1, stringReaderGuard1.get(), b2, stringReaderGuard2.get(), pBinBuffer); + assert(cmp <= 0); + } + } +#endif + } + DBUG_PRINT("sparrow_coalescing", ("Moved readers to first index position, start sorting and writing")); + uint64_t indexSize = 0; + char filename[FN_REFLEN]; + { + FileWriter writer(coalescedPartition->getFileName(index, filename), FILE_TYPE_INDEX, FILE_MODE_CREATE); + if (isAppend) { + IndexFileHeader dummy; + writer << dummy; + } else { + PersistentPartition::writeFileFormat(writer); + } + + // Write records and build tree nodes. + uint32_t currentRow = 0; // TODO row size + uint32_t startRow = 0; + GrowingByteBuffer treeBuffer; // Contains tree nodes in list order. + uint32_t nodes = 0; + uint8_t* cmpKey = keyValues.data() + nbPartitions * recordSize; + uint32_t partition = indirector[0]; + const uint8_t* currentKey = keyValues.data() + partition * recordSize; + memcpy(cmpKey, currentKey, recordSize); + uint32_t cmpPartition = partition; + while (true) { + if (task != 0 && (currentRow % 16384) == 0 && task->isStopping()) { + return 0; + } + Position& position = positions[partition]; + assert(position.getPartition() == partition); + uint32_t row = rowOffsets[partition] + position.getRow(); + writer << row; // TODO row size + const uint32_t indexHint = position.getIndexHint() + 1; + PartitionReaderGuard readerGuard(readers.get(partition, 0)); + PartitionReader& reader = readerGuard.get(); + if (position.hasIntervalHint() && indexHint <= position.getEndHint()) { + // Stay on the same index value: positions order is unchanged. + reader.seekRecord(indexHint); + reader >> row; // TODO row size + position = Position(partition, row, indexHint, position.getStartHint(), position.getEndHint(), position.getTreeHint()); + } else { + PartitionReaderGuard stringReaderGuard(readers.get(partition, 1)); + PartitionReader& stringReader = stringReaderGuard.get(); + const FileHeaderBase& header = reader.getHeader(); + if (indexHint == header.getRecords()) { + position = Position(); + } else if (header.isTreeComplete()) { + const uint32_t node = header.getNextNode(position.getTreeHint()); + reader.seekTree(node); + uint32_t start; + uint32_t end; + reader >> start >> end; // TODO row size + ByteBuffer b(currentKey, recordSize); + recordWrapper.readKeyValue(reader, stringReader, b, pBinBuffer); + reader.seekRecord(start);// TODO row size + reader >> row; // TODO row size + position = Position(partition, row, start, start, end, node); + } else { + reader.seekRecord(indexHint); + reader >> row; // TODO row size + ByteBuffer b(currentKey, recordSize); + recordWrapper.readKeyValue(reader, stringReader, b, pBinBuffer); + position = Position(partition, row, indexHint, INVALID_ROW, INVALID_ROW, INVALID_TREE_NODE); + } + if (position.isValid()) { + CoalescingKeyComparator comparator(readers, recordWrapper, keyValues, indirector, pBinBuffer); + const uint32_t n = indirector.length() - 1; + const uint32_t insertionPoint = BinarySearch::find(comparator, 0, n, SearchFlag::GE); + uint32_t* data = const_cast(indirector.data()); + const uint32_t save = data[0]; + memmove(data, data + 1, sizeof(data[0]) * n); + if (insertionPoint == UINT_MAX || insertionPoint == n) { + data[n] = save; + } else { + memmove(data + insertionPoint + 1, data + insertionPoint, sizeof(data[0]) * (n - insertionPoint)); + data[insertionPoint] = save; + } + } else { + indirector.removeFirst(); + } + if (indirector.isEmpty()) { + treeBuffer << startRow << currentRow << ByteBuffer(cmpKey, recordSize); // TODO row size + ++nodes; + ++currentRow; + break; + } else { + partition = indirector[0]; + currentKey = keyValues.data() + partition * recordSize; + ByteBuffer b1(cmpKey, recordSize); + ByteBuffer b2(currentKey, recordSize); + PartitionReaderGuard stringReaderGuard1(readers.get(cmpPartition, 1)); + PartitionReaderGuard stringReaderGuard2(readers.get(partition, 2)); + const int cmp = recordWrapper.compare(b1, stringReaderGuard1.get(), b2, stringReaderGuard2.get(), pBinBuffer); + assert(cmp <= 0); + if (cmp != 0) { + treeBuffer << startRow << currentRow << ByteBuffer(cmpKey, recordSize); // TODO row size + memcpy(cmpKey, currentKey, recordSize); + cmpPartition = partition; + startRow = currentRow + 1; + ++nodes; + } + } + } + ++currentRow; + } + + // Write tree nodes in tree order. + const TreeOrder& treeOrder = TreeOrder::get(nodes); + for (uint32_t i = 0; i < nodes; ++i) { + const uint32_t node = treeOrder.getListIndex(i, nodes); + writer << ByteBuffer(treeBuffer.getData() + node * nodeSize, nodeSize); + } + + const TimePeriod period = coalescedPartition->getPeriod(); + if (isAppend) { + // Write header. + writer.write(); + IndexFileHeader header(index, 4, currentRow, nodeSize, nodes, period.getMin(), period.getMax()); + writer.seek(0, header.size()); +#ifndef NDEBUG + debugHeader = FileHeader(0, static_cast(treeBuffer.position()), true, nodeSize, 4, currentRow, index, period.getMin(), period.getMax()); +#endif + writer << header; + } else { + // Write binary data. + writer << binBuffer; + + // Padding. + const FileHeader header(static_cast(binBuffer.position()), static_cast(treeBuffer.position()), true, nodeSize, + 4, currentRow, index, period.getMin(), period.getMax()); +#ifndef NDEBUG + debugHeader = header; +#endif + const uint64_t indexSize = header.getTotalSize(); + const uint64_t target = indexSize - FileHeader::size(); + while (writer.getFileOffset() < target) { + writer << static_cast(0); + } + + // Write header. + writer << header; + } + writer.write(); + indexSize = writer.getFileSize(); + if (!firstPartition.isTemporary()) { + coalescedPartition->addIndexSize(indexSize); + } + } +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + const Str sizeTotal(Str::fromSize(debugHeader.getTotalSize())); + const Str sizeRecords(Str::fromSize(debugHeader.getRecordSize() * debugHeader.getRecords())); + const Str sizeTree(Str::fromSize(debugHeader.getTreeSection().getSize())); + DBUG_PRINT("sparrow_coalescing", ("Written index file %u of coalesced partition %s.%s.%llu (%u partitions, %s total = %s records + %s tree) in %s", + index, master.getDatabase().c_str(), master.getTable().c_str(), static_cast(coalescedPartition->getSerial()), nbPartitions, + sizeTotal.c_str(), sizeRecords.c_str(), sizeTree.c_str(), duration.c_str())); +#endif + return indexSize; +} + +} diff --git a/storage/sparrow/engine/coalescing.h b/storage/sparrow/engine/coalescing.h new file mode 100644 index 000000000000..b7684a76ee44 --- /dev/null +++ b/storage/sparrow/engine/coalescing.h @@ -0,0 +1,343 @@ +/* + Partition coalescing. +*/ + +#ifndef _engine_coalescing_h_ +#define _engine_coalescing_h_ + +#include "persistent.h" +#include "binbuffer.h" +#include "search.h" +#include "sort.h" +#include "sema.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescablePartitions +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescablePartitions { +private: + + CoalescingInfo info_; + PersistentPartitions partitions_; + +public: + + CoalescablePartitions(const CoalescingInfo& info) : info_(info) { + } + + const CoalescingInfo& getInfo() const { + return info_; + } + + PersistentPartitions& getPartitions() { + return partitions_; + } + + const PersistentPartitions& getPartitions() const { + return partitions_; + } + + bool operator == (const CoalescablePartitions& right) const { + return info_ == right.info_; + } + + uint32_t hash() const { + uint32_t result = 31 + info_.getFirst().getFirst(); + result = 31 * result + info_.getFirst().getSecond(); + const uint64_t serial = info_.getSecond(); + return 31 + static_cast(serial ^ (serial >> 32)); + return result; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingWorker { +private: + + static const JobThreadFactory factory_; + static JobThreadPool* threadPool_; + static Lock lock_; + +public: + + static void initialize() _THROW_(SparrowException); + static void shutdown(); + static void sendJob(Job* job); + static Queue& getQueue() { + return *threadPool_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingFlags +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingFlags { +public: + CoalescingFlags(const char* name) : lock_(false, name), counter_(0), aborted_(false) {;} + + void acquire() { + Guard guard(lock_); + counter_++; + } + + bool release() { + Guard guard(lock_); + assert(counter_ > 0); + return (--counter_ == 0); + } + + void setAborted() { + aborted_ = true; + } + + bool isAborted() const { return aborted_; } + +public: + Lock lock_; + uint32_t counter_; + bool aborted_; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingMainTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingMainTask : public MasterTask { +private: + + PersistentPartitions partitions_; + IndexIds indexIds_; + +public: + + CoalescingMainTask(Master* master, const PersistentPartitions& partitions, const IndexIds& indexIds) + : MasterTask(CoalescingWorker::getQueue(), master), partitions_(partitions), indexIds_(indexIds) { + get()->registerCoalescingTask(this); + Atomic::inc32(&SparrowStatus::get().tasksPendingCoalescingMainTasks_); + } + + ~CoalescingMainTask() { + WriteGuard guard(get()->getLock()); + get()->unregisterCoalescingTask(this); + Atomic::dec32(&SparrowStatus::get().tasksPendingCoalescingMainTasks_); + } + + virtual bool operator == (const CoalescingMainTask& right) const { + return this == &right; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingIndexTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingIndexTask : public MasterTask { +private: + + PersistentPartitions allPartitions_; + PersistentPartitions partitions_; + PersistentPartitionGuard coalescedPartition_; + const uint32_t index_; + CoalescingFlags* flags_; + bool run_; + + void finished() { + if (flags_->release()) { + if (flags_->isAborted()) { + get()->coalescingFailed(allPartitions_); + } else { + get()->coalescingDone(coalescedPartition_, allPartitions_); + } + delete flags_; + flags_ = NULL; + } + } + +public: + + CoalescingIndexTask(Master* master, const PersistentPartitions& allPartitions, + const PersistentPartitions& partitions, PersistentPartition* coalescedPartition, const uint32_t index, CoalescingFlags* flags) + : MasterTask(CoalescingWorker::getQueue(), master), allPartitions_(allPartitions), + partitions_(partitions), coalescedPartition_(coalescedPartition), index_(index), flags_(flags), run_(false) { + flags_->acquire(); + get()->registerCoalescingTask(this); // Assumes Master.lock_ is already acquired + Atomic::inc32(&SparrowStatus::get().tasksPendingCoalescingIndexTasks_); + } + + ~CoalescingIndexTask() { + if (!run_) { + assert(isStopping() == true); + flags_->setAborted(); + finished(); + } + WriteGuard guard(get()->getLock()); + get()->unregisterCoalescingTask(this); + Atomic::dec32(&SparrowStatus::get().tasksPendingCoalescingIndexTasks_); + } + + virtual bool operator == (const CoalescingIndexTask& right) const { + return this == &right; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingReaders +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingReaders : private PartitionReadersBase { +private: + + const PersistentPartitions& partitions_; + const uint32_t index_; + const BlockCacheHint& hint_; + +private: + + CoalescingReaders(const CoalescingReaders& right); + CoalescingReaders& operator = (const CoalescingReaders& right); + +public: + + CoalescingReaders(const PersistentPartitions& partitions, const uint32_t index) + : PartitionReadersBase(partitions.length() * 3), partitions_(partitions), index_(index), hint_(BlockCacheHint::largeAround0_) { + for (uint32_t i = 0; i < PartitionReadersBase::capacity(); ++i) { + PartitionReadersBase::append(0); + } + } + + ~CoalescingReaders() { + clearAndDestroy(); + } + + PartitionReader* get(const uint32_t partition, const uint32_t forString) _THROW_(SparrowException) { + assert(forString <= 2); + const uint32_t i = partition * 3 + forString; + PartitionReader* reader = PartitionReadersBase::operator[](i); + if (reader == 0) { + reader = partitions_[partition]->createReader(index_, forString != 0, hint_); + PartitionReadersBase::operator[](i) = reader; + } + return reader; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingKeyComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingKeyComparator { +private: + + CoalescingReaders& readers_; + const RecordWrapper& recordWrapper_; + const uint32_t size_; + const uint8_t* data_; + const KeyIndirector& indirector_; + BinBuffer* binBuffer_; + const uint8_t* reference_; + +public: + + CoalescingKeyComparator(CoalescingReaders& readers, const RecordWrapper& recordWrapper, const KeyValues& keyValues, + const KeyIndirector& indirector, BinBuffer* binBuffer) + : readers_(readers), recordWrapper_(recordWrapper), size_(recordWrapper.getSize() - 4), // TODO adjust row size + data_(keyValues.data()), indirector_(indirector), binBuffer_(binBuffer), reference_(keyValues.data() + indirector[0] * size_) { + } + + int compareTo(const uint32_t row) _THROW_(SparrowException) { + const uint32_t row1 = indirector_[row + 1]; + ByteBuffer buffer1(data_ + row1 * size_, size_); + ByteBuffer buffer2(reference_, size_); + PartitionReaderGuard stringReaderGuard1(readers_.get(row1, 1)); + PartitionReaderGuard stringReaderGuard2(readers_.get(indirector_[0], 2)); + return recordWrapper_.compare(buffer1, stringReaderGuard1.get(), buffer2, stringReaderGuard2.get(), binBuffer_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingComparator { +private: + + CoalescingReaders& readers_; + const RecordWrapper& recordWrapper_; + BinBuffer* binBuffer_; + const uint32_t size_; + const uint8_t* data_; + +public: + + CoalescingComparator(CoalescingReaders& readers, const RecordWrapper& recordWrapper, const KeyValues& keyValues, BinBuffer* binBuffer) + : readers_(readers), recordWrapper_(recordWrapper), binBuffer_(binBuffer), size_(recordWrapper.getSize() - 4), // TODO adjust row size + data_(keyValues.data()) { + } + + int compare(const uint32_t row1, const uint32_t row2) const { + ByteBuffer buffer1(data_ + row1 * size_, size_); + ByteBuffer buffer2(data_ + row2 * size_, size_); + PartitionReaderGuard stringReaderGuard1(readers_.get(row1, 1)); + PartitionReaderGuard stringReaderGuard2(readers_.get(row2, 2)); + const int cmp = recordWrapper_.compare(buffer1, stringReaderGuard1.get(), buffer2, stringReaderGuard2.get(), binBuffer_); + if (cmp == 0) { + return row1 > row2 ? 1 : (row1 < row2 ? -1 : 0); + } else { + return cmp; + } + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Coalescing +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef SYSvector RowOffsets; // Partition row offsets. + +class Coalescing { +public: + + static void initialize() _THROW_(SparrowException); + + static void shutdown(); + + static void triggerIndexCoalescing(Master* master, const PersistentPartitions& allPartitions, + const PersistentPartitions& partitions, PersistentPartition* coalescedPartition, const IndexIds& indexIds) _THROW_(SparrowException); + + static PersistentPartition* generateDataFile(PersistentPartitions& partitions, + const Task& task) _THROW_(SparrowException); + + static uint64_t generateIndexFile(const PersistentPartitions& partitions, const uint32_t index, PersistentPartition* coalescedPartition, + const Task* task) _THROW_(SparrowException); +}; + +} + +#endif /* #ifndef _engine_coalescing_h_ */ diff --git a/storage/sparrow/engine/compress.cc b/storage/sparrow/engine/compress.cc new file mode 100644 index 000000000000..791de116ac79 --- /dev/null +++ b/storage/sparrow/engine/compress.cc @@ -0,0 +1,98 @@ +/* + Compression helpers. +*/ + +#include "compress.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// LZJB +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define NBBY 8 +#define MATCH_BITS 6 +#define MATCH_MIN 3 +#define MATCH_MAX ((1 << MATCH_BITS) + (MATCH_MIN - 1)) +#define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) +#define LEMPEL_SIZE 1024 + +// STATIC +size_t LZJB::compress(uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len) { + uint8_t* src = s_start; + uint8_t* dst = d_start; + uint8_t* cpy; + uint8_t* copymap = 0; + int copymask = 1 << (NBBY - 1); + int mlen, offset, hash; + uint16_t* hp; + uint16_t lempel[LEMPEL_SIZE] = { 0 }; + while (src < s_start + s_len) { + if ((copymask <<= 1) == (1 << NBBY)) { + if (dst >= d_start + d_len - 1 - 2 * NBBY) { + return s_len; + } + copymask = 1; + copymap = dst; + *dst++ = 0; + } + if (src > s_start + s_len - MATCH_MAX) { + *dst++ = *src++; + continue; + } + hash = (src[0] << 16) + (src[1] << 8) + src[2]; + hash += hash >> 9; + hash += hash >> 5; + hp = &lempel[hash & (LEMPEL_SIZE - 1)]; + offset = (uint64_t)(src - *hp) & OFFSET_MASK; + *hp = static_cast((uint64_t)src); + cpy = src - offset; + if (cpy >= s_start && cpy != src && + src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) { + *copymap |= copymask; + for (mlen = MATCH_MIN; mlen < MATCH_MAX; ++mlen) { + if (src[mlen] != cpy[mlen]) { + break; + } + } + *dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | (offset >> NBBY); + *dst++ = static_cast(offset); + src += mlen; + } else { + *dst++ = *src++; + } + } + return dst - d_start; +} + +// STATIC +int LZJB::decompress(uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len) { + uint8_t* src = s_start; + uint8_t* dst = d_start; + uint8_t* d_end = (uint8_t*)d_start + d_len; + uint8_t* cpy; + uint8_t copymap = 0; + int copymask = 1 << (NBBY - 1); + while (dst < d_end) { + if ((copymask <<= 1) == (1 << NBBY)) { + copymask = 1; + copymap = *src++; + } + if (copymap & copymask) { + int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; + int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; + src += 2; + if ((cpy = dst - offset) < (uint8_t *)d_start) { + return -1; + } + while (--mlen >= 0 && dst < d_end) { + *dst++ = *cpy++; + } + } else { + *dst++ = *src++; + } + } + return 0; +} + +} diff --git a/storage/sparrow/engine/compress.h b/storage/sparrow/engine/compress.h new file mode 100644 index 000000000000..03f92d1ecf43 --- /dev/null +++ b/storage/sparrow/engine/compress.h @@ -0,0 +1,29 @@ +/* + Compression helpers. +*/ + +#ifndef _engine_compress_h_ +#define _engine_compress_h_ + +#include "types.h" +#include "serial.h" +#include "list.h" +#include "hash.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// LZJB +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class LZJB { +public: + + static size_t compress(uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len); + + static int decompress(uint8_t* s_start, uint8_t* d_start, size_t s_len, size_t d_len); +}; + +} + +#endif /* #ifndef _engine_compress_h_ */ diff --git a/storage/sparrow/engine/cond.h b/storage/sparrow/engine/cond.h new file mode 100644 index 000000000000..bc35c96ffd84 --- /dev/null +++ b/storage/sparrow/engine/cond.h @@ -0,0 +1,170 @@ +/* + Condition variable. +*/ + +#ifndef _engine_cond_h_ +#define _engine_cond_h_ + +#include "lock.h" +#include "mysql/psi/mysql_cond.h" + +namespace Sparrow { + +class Cond { +private: + + PSI_cond_key key_; + PSI_cond_info info_; + mysql_cond_t cond_; + Lock* lock_; + uint32_t volatile nbWaiters_; + uint8_t owned_:1; + uint8_t static_:1; + +private: + + static SYSslist& getStatics() { + static SYSslist statics; + return statics; + } + + void initialize() { +#ifdef HAVE_PSI_INTERFACE + //if (PSI_server != 0) { + Lock::lockPSI(); + mysql_cond_register("sparrow", &info_, 1); + Lock::unlockPSI(); + //} +#endif + mysql_cond_init(key_, &cond_); + } + + void clear() { + if (info_.m_name != 0) { + mysql_cond_destroy(&cond_); + my_free(const_cast(info_.m_name)); + if (owned_) { + delete lock_; + } + info_.m_name = 0; + } + } + + Cond& operator = (const Cond&); + Cond(const Cond&); + +public: + + Cond(const bool isStatic, const char* name) : lock_(new Lock(isStatic, name)), nbWaiters_(0), owned_(true), static_(isStatic) { + info_.m_key = &key_; + //const size_t l = strlen(name); + //name += l > PFS_MAX_INFO_NAME_LENGTH ? (l - PFS_MAX_INFO_NAME_LENGTH) : 0; + info_.m_name = my_strdup(PSI_INSTRUMENT_ME, name, MYF(MY_WME)); + info_.m_flags = 0; + info_.m_volatility = PSI_VOLATILITY_UNKNOWN; + info_.m_documentation = PSI_DOCUMENT_ME; + if (static_) { + Cond::getStatics().append(this); + } else { + initialize(); + } + } + + Cond(const bool isStatic, Lock& lock, const char* name) : lock_(&lock), nbWaiters_(0), owned_(false), static_(isStatic) { + info_.m_key = &key_; + //const size_t l = strlen(name); + //name += l > PFS_MAX_INFO_NAME_LENGTH ? (l - PFS_MAX_INFO_NAME_LENGTH) : 0; + info_.m_name = my_strdup(PSI_INSTRUMENT_ME, name, MYF(MY_WME)); + info_.m_flags = 0; + info_.m_volatility = PSI_VOLATILITY_UNKNOWN; + info_.m_documentation = PSI_DOCUMENT_ME; + if (static_) { + Cond::getStatics().append(this); + } else { + initialize(); + } + } + + static void initializeStatics() { + SYSslistIterator iterator(Cond::getStatics()); + while (++iterator) { + iterator.key()->initialize(); + } + } + + static void deinitializeStatics() { + SYSslistIterator iterator(Cond::getStatics()); + while (++iterator) { + iterator.key()->clear(); + } + } + + ~Cond() { + clear(); + } + + Lock& getLock() { + return *lock_; + } + + void acquire() { + lock_->lock(); + } + + void release() { + lock_->unlock(); + } + + void signal(const bool acquired = false) { + if (!acquired) { + acquire(); + } + if (nbWaiters_ > 0) { + mysql_cond_signal(&cond_); + } + if (!acquired) { + release(); + } + } + + void signalAll(const bool acquired = false) { + if (!acquired) { + acquire(); + } + if (nbWaiters_ > 0) { + mysql_cond_broadcast(&cond_); + } + if (!acquired) { + release(); + } + } + + bool wait(const uint64_t milliseconds, const bool acquired = false) { + if (!acquired) { + acquire(); + } + nbWaiters_++; + int status; + if (milliseconds == 0) { + status = mysql_cond_wait(&cond_, lock_->get()); // Infinite wait. + } else { + struct timespec t; + const uint64_t nanoseconds = milliseconds * 1000000; + set_timespec_nsec(&t, nanoseconds); + status = mysql_cond_timedwait(&cond_, lock_->get(), &t); + } + nbWaiters_--; + if (!acquired) { + release(); + } + return (status == 0); + } + + bool wait(const bool acquired = false) { + return wait(0, acquired); + } +}; + +} + +#endif /* #ifndef _engine_cond_h_ */ diff --git a/storage/sparrow/engine/condition.cc b/storage/sparrow/engine/condition.cc new file mode 100644 index 000000000000..f217dc112150 --- /dev/null +++ b/storage/sparrow/engine/condition.cc @@ -0,0 +1,423 @@ +/* + Analyzer for WHERE condition: find timestamp intervals. +*/ + +#define MYSQL_SERVER 1 +#include "sql/table.h" +#include "sql/item.h" +#include "sql/item_func.h" +#include "sql/item_cmpfunc.h" +#include "sql/current_thd.h" +#include "sql/sql_time.h" +#include "sql/sql_class.h" + +#include "../handler/plugin.h" // For configuration parameters. +#include "condition.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TimePeriods +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void TimePeriods::makeAnd(const TimePeriods& right) { + TimePeriods result(std::max(capacity(), right.capacity())); + for (uint32_t i = 0; i < right.length(); ++i) { + const TimePeriod& pright = right[i]; + for (uint32_t j = 0; j < length(); ++j) { + const TimePeriod& pleft = (*this)[j]; + result.insert(pleft.makeIntersection(pright)); + } + } + *this = result; + if (isEmpty()) { + insert(TimePeriod()); + } + compact(); +} + +void TimePeriods::makeOr(const TimePeriod& right) { + bool found = false; + for (uint32_t j = 0; j < length(); ++j) { + const TimePeriod& left = (*this)[j]; + if (left.intersects(right) || left.isAdjacent(right)) { + remove(left); + insert(left.makeUnion(right)); + found = true; + break; + } + } + if (!found) { + insert(right); + } + compact(); +} + +void TimePeriods::makeOr(const TimePeriods& right) { + for (uint32_t i = 0; i < right.length(); ++i) { + makeOr(right[i]); + } +} + +void TimePeriods::makeNot() { + TimePeriods list(*this); + clear(); + TimePeriod tmp[2]; + for (uint32_t i = 0; i < list.length(); ++i) { + list[i].makeNot(tmp); + const int n = tmp[1].isVoid() ? 1 : 2; + for (int j = 0; j < n; ++j) { + makeOr(tmp[j]); + } + } +} + +void TimePeriods::compact() { + uint32_t i = 0; + while (i + 1 < length()) { + TimePeriod& p1 = (*this)[i]; + TimePeriod& p2 = (*this)[i + 1]; + if (p1.intersects(p2)) { + p1 = p1.makeUnion(p2); + removeAt(i + 1); + } else { + i++; + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Condition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Timestamp pruning: browses pushed condition and returns a list of timestamp intervals. +// If we end up with ]-inf, +inf[, use given lower timestamp to restrict time range. +// STATIC +TimePeriods Condition::getPeriods(TABLE* table, Item* cond, const uint64_t lower) { + SPARROW_ENTER("Condition::getPeriods"); + TimePeriods periods; + if (cond != 0) { + periods = Condition::get(table, cond, true); + } + if (periods.isEmpty() || periods.first().isAll()) { + periods.clear(); + if (lower == 0) { + periods.insert(TimePeriod()); + } else { + periods.insert(TimePeriod(&lower, 0, true, false)); + } + } + return periods; +} + +// STATIC +// See Condition::getPeriods() +TimePeriods Condition::get(TABLE* table, Item* item, const bool returnAllIfNone) { + SPARROW_ENTER("Condition::get"); + TimePeriods periods; + switch (item->type()) { + case Item::FIELD_ITEM: + break; + case Item::FUNC_ITEM: + case Item::COND_ITEM: { + Item ** args = NULL; + int tindex = -1; + bool isTimestamp = true; + Item_func* func = static_cast(item); + const uint n = func->argument_count(); + if ( n != 0 ) { + args = func->arguments(); + for (uint i = 0; i < n; ++i) { + const Item* arg = args[i]; + if (arg->type() == Item::FIELD_ITEM) { + const Item_field* field = static_cast(arg); + if (field->field->table == table && field->field->field_index() == 0) { + tindex = i; + break; + } + } else if (arg->type() == Item::FUNC_ITEM) { + const Item_func* test = static_cast(arg); + if (strcmp(test->func_name(), "unix_timestamp") == 0 && test->argument_count() == 1) { + const Item* testArg = test->arguments()[0]; + if (testArg->type() == Item::FIELD_ITEM) { + const Item_field* field = static_cast(testArg); + if (field->field->table == table && field->field->field_index() == 0) { + tindex = i; + isTimestamp = false; + break; + } + } + } + } + } + } + uint64_t bound = (uint64_t)-1; + if (tindex >= 0 && n >= 2) { + bound = getBound(args[tindex == 0 ? 1 : 0], isTimestamp); + } + + // Since MySQL timestamps have no milliseconds, in some cases, we add 999ms to + // the lower or upper bound of the timestamp interval to make sure + // we get all matching records. + switch (func->functype()) { + case Item_func::EQUAL_FUNC: + case Item_func::EQ_FUNC: { + if (bound != (uint64_t)-1) { +#ifndef NDEBUG + const Str stime(Str::fromTimestamp(bound)); + DBUG_PRINT("sparrow_context", ("= %s", stime.c_str())); +#endif + periods.insert(TimePeriod(bound)); + } + break; + } + case Item_func::MULT_EQUAL_FUNC: { + Item_equal* equal = static_cast(func); + const Item* constItem = equal->const_arg(); + if (constItem != 0) { + const Item_field* field = equal->get_first(); + if (field != 0 && field->field->field_index() == 0) { + bound = getBound(equal->const_arg(), isTimestamp); +#ifndef NDEBUG + const Str stime(Str::fromTimestamp(bound)); + DBUG_PRINT("sparrow_context", ("= %s", stime.c_str())); +#endif + periods.insert(TimePeriod(bound)); + } + } + break; + } + case Item_func::NE_FUNC: { + if (bound != (uint64_t)-1) { +#ifndef NDEBUG + const Str stime(Str::fromTimestamp(bound)); + DBUG_PRINT("sparrow_context", ("= %s", stime.c_str())); +#endif + periods.insert(TimePeriod(bound)); + periods.makeNot(); + } + break; + } + case Item_func::LT_FUNC: { + if (bound != (uint64_t)-1) { +#ifndef NDEBUG + const Str stime(Str::fromTimestamp(bound)); + DBUG_PRINT("sparrow_context", ("< %s", stime.c_str())); +#endif + if ( tindex == 0 ) { + periods.insert(TimePeriod(0, &bound, false, false)); + } else { + periods.insert(TimePeriod(&bound, 0, false, false)); + } + } + break; + } + case Item_func::LE_FUNC: { + if (bound != (uint64_t)-1) { +#ifndef NDEBUG + const Str stime(Str::fromTimestamp(bound)); + DBUG_PRINT("sparrow_context", ("<= %s", stime.c_str())); +#endif + if ( tindex == 0 ) { + periods.insert(TimePeriod(0, &bound, false, true)); + } else { + periods.insert(TimePeriod(&bound, 0, true, false)); + } + } + break; + } + case Item_func::GE_FUNC: { + if (bound != (uint64_t)-1) { +#ifndef NDEBUG + const Str stime(Str::fromTimestamp(bound)); + DBUG_PRINT("sparrow_context", (">= %s", stime.c_str())); +#endif + if ( tindex == 0 ) { + periods.insert(TimePeriod(&bound, 0, true, false)); + } else { + periods.insert(TimePeriod(0, &bound, false, true)); + } + + } + break; + } + case Item_func::GT_FUNC: { + if (bound != (uint64_t)-1) { +#ifndef NDEBUG + const Str stime(Str::fromTimestamp(bound)); + DBUG_PRINT("sparrow_context", ("> %s", stime.c_str())); +#endif + if ( tindex == 0 ) { + periods.insert(TimePeriod(&bound, 0, false, false)); + } else { + periods.insert(TimePeriod(0, &bound, false, false)); + } + } + break; + } + case Item_func::COND_AND_FUNC: { + List* list = static_cast(func)->argument_list(); + List_iterator li(*list); + Item* iitem; + while ((iitem = li++) != 0) { + if (periods.isEmpty()) { + periods = Condition::get(table, iitem, true); + } else { + DBUG_PRINT("sparrow_context", ("AND")); + periods.makeAnd(Condition::get(table, iitem, true)); + } + } + break; + } + case Item_func::COND_OR_FUNC: { + List* list = static_cast(func)->argument_list(); + List_iterator li(*list); + Item* iitem; + while ((iitem = li++) != 0) { + if (periods.isEmpty()) { + periods = Condition::get(table, li++, true); + } else { + DBUG_PRINT("sparrow_context", ("OR")); + periods.makeOr(Condition::get(table, iitem, true)); + } + } + break; + } + case Item_func::NOT_FUNC: { + DBUG_PRINT("sparrow_context", ("NOT")); + periods = Condition::get(table, func->arguments()[0], false); + break; + } + case Item_func::BETWEEN: { + if (bound != (uint64_t)-1) { + const uint64_t bound2 = getBound(args[2], isTimestamp); + if (bound2 != 0) { + TimePeriod period = TimePeriod(bound, bound2); +#ifndef NDEBUG + const Str speriod(Str::fromTimePeriod(period)); + DBUG_PRINT("sparrow_context", ("BETWEEN %s", speriod.c_str())); +#endif + periods.insert(period); + } + } + break; + } + case Item_func::IN_FUNC: { + if (bound != (uint64_t)-1) { + periods.insert(TimePeriod(bound)); +#ifndef NDEBUG + TimePeriod period(bound); + const Str speriod(Str::fromTimePeriod(period)); + DBUG_PRINT("sparrow_context", ("IN %s", speriod.c_str())); +#endif + for (int i = 2; i < static_cast(n); ++i) { + if (i != tindex) { + const uint64_t other = getBound(args[i], isTimestamp); + if (other != 0) { + periods.insert(TimePeriod(other)); +#ifndef NDEBUG + TimePeriod period(other); + const Str speriod(Str::fromTimePeriod(period)); + DBUG_PRINT("sparrow_context", ("IN %s", speriod.c_str())); +#endif + } + } + } + } + break; + } + case Item_func::FT_FUNC: + case Item_func::UNKNOWN_FUNC: + case Item_func::LIKE_FUNC: + case Item_func::ISNULL_FUNC: + case Item_func::ISNOTNULL_FUNC: + //case Item_func::COND_XOR_FUNC: + case Item_func::ISNOTNULLTEST_FUNC: + case Item_func::SP_EQUALS_FUNC: + case Item_func::SP_DISJOINT_FUNC: + case Item_func::SP_INTERSECTS_FUNC: + case Item_func::SP_TOUCHES_FUNC: + case Item_func::SP_CROSSES_FUNC: + case Item_func::SP_WITHIN_FUNC: + case Item_func::SP_CONTAINS_FUNC: + case Item_func::SP_OVERLAPS_FUNC: + case Item_func::SP_STARTPOINT: + case Item_func::SP_ENDPOINT: + case Item_func::SP_EXTERIORRING: + case Item_func::SP_POINTN: + case Item_func::SP_GEOMETRYN: + case Item_func::SP_INTERIORRINGN: + case Item_func::NOT_ALL_FUNC: + case Item_func::NOW_FUNC: + case Item_func::TRIG_COND_FUNC: + case Item_func::SUSERVAR_FUNC: + case Item_func::EXTRACT_FUNC: + case Item_func::TYPECAST_FUNC: + case Item_func::FUNC_SP: + case Item_func::UDF_FUNC: + default: break; + } + break; + } + case Item::SUM_FUNC_ITEM: + case Item::STRING_ITEM: + case Item::INT_ITEM: + case Item::REAL_ITEM: + case Item::NULL_ITEM: + case Item::VARBIN_ITEM: + case Item::METADATA_COPY_ITEM: + case Item::FIELD_AVG_ITEM: + case Item::DEFAULT_VALUE_ITEM: + case Item::PROC_ITEM: + case Item::REF_ITEM: + case Item::FIELD_STD_ITEM: + case Item::FIELD_VARIANCE_ITEM: + case Item::INSERT_VALUE_ITEM: + case Item::SUBSELECT_ITEM: + case Item::ROW_ITEM: + case Item::CACHE_ITEM: + case Item::TYPE_HOLDER: + case Item::PARAM_ITEM: + case Item::TRIGGER_FIELD_ITEM: + case Item::DECIMAL_ITEM: + case Item::XPATH_NODESET: + case Item::XPATH_NODESET_CMP: + case Item::VIEW_FIXER_ITEM: + default: break; + } + if (returnAllIfNone && periods.isEmpty()) { + // Nothing found: return ]-inf, +inf[. + periods.insert(TimePeriod()); + } + return periods; +} + +// STATIC +uint64_t Condition::getBound(Item* item, const bool isTimestamp) { + Item_result res_type = item->result_type(); + if (res_type == INT_RESULT) { + if (item->val_int() < 0) return -1; + return item->val_int() * 1000ULL; + } else if (res_type == STRING_RESULT) { + MYSQL_TIME t; + if (item->get_date(&t, TIME_FUZZY_DATE) == 0) { + THD* thd = current_thd; + int warning = 0; + my_timeval tm{0,0}; + if (!datetime_with_no_zero_in_date_to_timeval(&t, *thd->time_zone(), &tm, &warning) || (warning & MYSQL_TIME_WARN_TRUNCATED)) { + spw_print_warning("Baddly formatted timestamp type in WHERE clause: %s. Sparrow can't use partition pruning.", thd->query().str); + return -1; + } else if (warning != 0) { + spw_print_warning("Baddly formatted timestamp type in WHERE clause: %s: warning %d", thd->query().str, warning); + } + return tm.m_tv_sec * 1000ULL + tm.m_tv_usec/1000; // return timestamp in ms + } + } else { + [[maybe_unused]] THD* thd = current_thd; + spw_print_warning("Wrong timestamp type in WHERE clause: %s. Sparrow can't use partition pruning.", thd->query().str); + } + + return -1; +} + +} diff --git a/storage/sparrow/engine/condition.h b/storage/sparrow/engine/condition.h new file mode 100644 index 000000000000..4b5327895c06 --- /dev/null +++ b/storage/sparrow/engine/condition.h @@ -0,0 +1,53 @@ +/* + Analyzer for WHERE condition: find timestamp intervals. +*/ + +#ifndef _engine_condition_h_ +#define _engine_condition_h_ + +#include "types.h" + +struct TABLE; + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TimePeriods +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TimePeriods : public SYSsortedVector { +public: + + TimePeriods(const uint32_t size = 0) : SYSsortedVector(size) { + } + + void makeAnd(const TimePeriods& right); + + void makeOr(const TimePeriod& right); + + void makeOr(const TimePeriods& right); + + void makeNot(); + + void compact(); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Condition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Condition { +private: + + static TimePeriods get(TABLE* table, Item* item, const bool returnAllIfNone); + + static uint64_t getBound(Item* item, const bool isTimestamp); + +public: + + static TimePeriods getPeriods(TABLE* table, Item* cond, const uint64_t lower); +}; + +} + +#endif /* #ifndef _engine_condition_h_ */ diff --git a/storage/sparrow/engine/context.cc b/storage/sparrow/engine/context.cc new file mode 100644 index 000000000000..f0fe8752c67b --- /dev/null +++ b/storage/sparrow/engine/context.cc @@ -0,0 +1,931 @@ +/* + Table handler context. +*/ + +#define MYSQL_SERVER 1 + +#include "context.h" +#include "../handler/hasparrow.h" +#include "condition.h" +#include "persistent.h" +#include "transient.h" +#include "internalapi.h" + +#include "sql/current_thd.h" +#include "sql/sql_class.h" +#include "sql/sql_lex.h" + +#include + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// QueryInfo +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void QueryInfo::clear(const bool resetContext) { + index_ = DATA_FILE; + dataMap_ = 0; + keyBuffer_.clear(); + currentKey_ = KeyValue(); + minKey_ = KeyValue(); + maxKey_ = KeyValue(); + keyInfo_ = 0; + indexHasTimestamps_ = false; + if (resetContext) { + snapshots_.clearAndDestroy(); + } +} + +QueryInfo::~QueryInfo() { + clear(true); +} + +void QueryInfo::update(TableShare& share, TABLE& table, const uint32_t index) { + clear(false); + + // In case of update, need to read all fields. + const bool forUpdate = !bitmap_is_clear_all(table.write_set); + mysqlIndex_ = index; + const Master& master = share.getMaster(); + index_ = master.getIndexId(index); + dataMap_ = 0; + indexHasTimestamps_ = false; + currentKey_ = KeyValue(); + minKey_ = KeyValue(); + maxKey_ = KeyValue(); + if (index_ != DATA_FILE) { + keyInfo_ = table.key_info + index; + + // Check if requested fields are all present in index ("covering index"). + // In this case, no need to read the data file. + MY_BITMAP* readSet = table.read_set; + uint count = forUpdate ? table.s->fields : bitmap_bits_set(readSet); + const ColumnIds& columnIds = master.getIndexes()[index_].getColumnIds(); + bool isCoveringIndex = false; + if (columnIds.length() >= count) { + const TableFields& fields = share.getFields(); + uint32_t f = 0; + for (uint32_t i = 0; i < fields.length(); ++i) { + const FieldBase* field = fields[i]; + if (field == 0 || !field->isMapped()) { + continue; + } + if (bitmap_is_set(readSet, f)) { + const uint32_t pos = columnIds.index(i); + if (pos != SYS_NPOS) { + dataMap_ |= (1 << pos); + if (--count == 0) { + isCoveringIndex = true; + break; + } + } + } + ++f; + } + } + if (!isCoveringIndex) { + dataMap_ = 0; + } + + // Initialize flag for QueryInfo::stopOnFirstMatch(). + const Columns& columns = master.getColumns(); + for (uint32_t i = 0; i < columnIds.length(); ++i) { + if (columns[columnIds[i]].getType() == COL_TIMESTAMP) { + indexHasTimestamps_ = true; + } + } + + // Allocate enough space for temporary key values. + const uint32_t keyLength = getKeyLength(); + keyBuffer_.resize(keyLength * 3); + keyBuffer_.forceLength(keyLength * 3); + uint8_t* keyBuffer = const_cast(keyBuffer_.data()); + currentKey_ = KeyValue(keyBuffer, HA_WHOLE_KEY); + minKey_ = KeyValue(keyBuffer + keyLength, HA_WHOLE_KEY); + maxKey_ = KeyValue(keyBuffer + keyLength * 2, HA_WHOLE_KEY); + } + updateIndirectors(); +} + +bool QueryInfo::snapshotTransientPartition(TransientPartition* partition) { + PartitionSnapshot* snapshot = partition->snapshot(); + if (snapshot == 0) { + return false; + } else { + assert(!snapshots_.contains(snapshot)); + snapshots_.insert(snapshot); + return true; + } +} + +void QueryInfo::updateIndirectors() { + SYSpHashIterator iterator(snapshots_); + while (++iterator) { + iterator.key()->updateIndirector(index_); + } +} + +const PartitionSnapshot* QueryInfo::getSnapshot(const TransientPartition* partition) const { + const PartitionSnapshot key(const_cast(partition)); + const PartitionSnapshot* snapshot = snapshots_.find(&key); + return snapshot; +} + +// Compares keys passed as parameters. +int QueryInfo::compareKeys(const TableFields& fields, const KeyValue& leftKey, const KeyValue& rightKey) const { + const KEY& keyInfo = getKeyInfo(); + const uint8_t* left = leftKey.getKey(); + const key_part_map leftMap = leftKey.getMap(); + const uint8_t* right = rightKey.getKey(); + const key_part_map rightMap = rightKey.getMap(); + for (uint32_t i = 0; i < keyInfo.user_defined_key_parts; ++i) { + const uint32_t bit = 1 << i; + if ((rightMap & bit) == 0 || (leftMap & bit) == 0) { + continue; + } + const KEY_PART_INFO& keyPartInfo = keyInfo.key_part[i]; + const FieldBase& field = *fields[keyPartInfo.fieldnr - 1]; + int cmp = 0; + if (keyPartInfo.null_bit != 0) { + const bool leftIsNull = (*left++ != 0); + const bool rightIsNull = (*right++ != 0); + if (leftIsNull) { + if (!rightIsNull) { + cmp = -1; + } else { + cmp = 0; + } + } else { + if (rightIsNull) { + cmp = 1; + } else { + cmp = field.compare(left, right); + } + } + } else { + cmp = field.compare(left, right); + } + if (cmp != 0) { + return cmp; + } + left += field.getLength(true); + right += field.getLength(true); + } + return 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Context +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Context::Context() : table_(0), share_(0), loaded_(false), writing_(false), altered_(false), + position_(UINT_MAX), indirectorIndex_(UINT_MAX), recordWrappers_(16), recordWrappersPerPartition_(16), insertRows_(0) { + SPARROW_ENTER("Context::Context"); +} + +void Context::reset() { + SPARROW_ENTER("Context::reset"); + partitions_.clear(); + mainPartitions_.clear(); + readers_.clear(); + positions_.clear(); + indirector_.clear(); + indirectorIndex_ = UINT_MAX; + keyValues_.clear(); + queryInfo_.clear(true); + loaded_ = false; + unusableIndexes_.clear(); + updatableColumns_.clear(); + recordWrappers_.clearAndDestroy(); + recordWrappersPerPartition_.clearAndDestroy(); +} + +void Context::initialize(TABLE* table, TableShare* share) { + SPARROW_ENTER("Context::initialize"); + table_ = table; + share_ = share; + reset(); + writing_ = false; +} + +Context::~Context() { + SPARROW_ENTER("Context::~Context"); + reset(); +} + +void Context::clone(const Context& context) { + reset(); + if (context.loaded_) { + partitions_ = context.partitions_; + mainPartitions_ = context.mainPartitions_; + readers_.initialize(context.partitions_.length()); + resetPosition(); + loaded_ = true; + queryInfo_.updateIndirectors(); + unusableIndexes_ = context.unusableIndexes_; + updatableColumns_ = context.updatableColumns_; + } +} + +void Context::resetPosition() { + position_ = Position(); + const uint32_t n = partitions_.length(); + positions_.clear(); + positions_.resize(n); + positions_.forceLength(n); + indirector_.clear(); + indirector_.resize(n); + indirector_.forceLength(n); + indirectorIndex_ = UINT_MAX; + const uint32_t length = queryInfo_.getKeyLength() * n; + keyValues_.reshape(length); +} + +PartitionReader* Context::getReader(const uint32_t partition, const uint32_t index, const bool isString, const BlockCacheHint& hint) _THROW_(SparrowException) { + return readers_.get(partition, static_cast(*partitions_[partition].get()), index, isString, hint); +} + +const RecordWrapper& Context::getRecordWrapper(const uint32_t alterSerial, const uint32_t index, const bool tree) { + const TableShare& share = getShare(); + if (alterSerial == share.getColumnAlterSerial()) { + return share.getRecordWrapper(index, tree); + } else { + const SerialRecordWrapper key(alterSerial, index, tree); + SerialRecordWrapper* wrapper = recordWrappers_.find(&key); + if (wrapper == 0) { + wrapper = share.createSerialRecordWrapper(getTable(), alterSerial, index, tree); + recordWrappers_.insert(wrapper); + } + return *wrapper; + } +} + +const RecordWrapper& Context::getRecordWrapper(const uint32_t alterSerial, const uint32_t partSerial, const ColumnIds& skippedColumnIds) { + const TableShare& share = getShare(); + const PartSerialRecordWrapper key(partSerial); + PartSerialRecordWrapper* wrapper = recordWrappersPerPartition_.find(&key); + if (wrapper == 0) { + wrapper = share.createPartSerialRecordWrapper(getTable(), alterSerial, partSerial, skippedColumnIds); + recordWrappersPerPartition_.insert(wrapper); + } + return *wrapper; +} + +// STATIC +Str Context::getQueryString() { + THD* thd = current_thd; + return Str(thd->query().str, static_cast(thd->query().length)); +} + +void Context::loadTimePeriods( TimePeriods& periods ) { + const Master& master = getShare().getMaster(); + uint64_t lower = 0; + { + // First get highest timestamp. + ReadGuard guard(master.getLock()); + const uint64_t newest = master.getNewest(); + const uint64_t defaultWhere = master.getDefaultWhere(); + if (newest != 0 && defaultWhere != 0) { + lower = newest - defaultWhere; + } + } + + THD* thd = current_thd; + Item* cond = thd->lex->current_query_block()->where_cond(); + periods = Condition::getPeriods(table_, cond, lower); +} + +/* Parses the current SQl statement to extract the time interval (if any) from the WHERE clause. + Deduce the covering list of partitions (put that list in partitions_): partition pruning mechanism. + If the WHERE clause does not specify any time interval, use the defaultWhere as default time interval. + That value is set at Master file level and set during table creation. If defaultWhere is 0, then look through all partitions. + Set the boolean flag loaded_ to true so that this parsing won't be done again for this SQL statement. + The SQL statement is found in the form of a pointer to a COND object in the MySQL THD structure + stored in the thread TLS, through thd->lex->current_select->prep_where +*/ +void Context::load() { + SPARROW_ENTER("Context::load"); + if (loaded_) { + return; + } +#ifndef NDEBUG + const Str query(getQueryString()); + const char* s = query.c_str(); + const char* p = s; + DBUG_LOCK; + DBUG_PRINT("sparrow_context", ("Query:")); + while (true) { + if (*s == 0) { + DBUG_PRINT("sparrow_context", ("%s", p)); + break; + } else if (*s == '\n') { + ptrdiff_t l = s - p; + const Str tmp(p, static_cast(l)); + DBUG_PRINT("sparrow_context", ("%s", tmp.c_str())); + p = s + 1; + } + s++; + } +#endif + partitions_.clear(); + readers_.clear(); + TimePeriods periods; + loadTimePeriods( periods ); + const Master& master = getShare().getMaster(); +#ifndef NDEBUG + Str speriods; +#endif + { + ReadGuard guard(master.getLock()); + + // Transform time periods into a list of partitions. + for (uint32_t i = 0; i < periods.length(); ++i) { + const TimePeriod& period = periods[i]; + master.getPartitionsForTimePeriod(period, partitions_, queryInfo_); +#ifndef NDEBUG + if (speriods.length() > 0) { + speriods += Str(", "); + } + speriods += Str::fromTimePeriod(period); +#endif + } + + // Get list of updatable columns: all except timestamp, strings and indexed columns. + const Columns& columns = master.getColumns(); + for (uint32_t i = 0; i < columns.length(); ++i) { + if (i != 0 && !columns[i].isString()) { + updatableColumns_.append(i); + } + } + const Indexes& indexes = master.getIndexes(); + for (uint32_t i = 0; i < indexes.length(); ++i) { + const ColumnIds& ids = indexes[i].getColumnIds(); + for (uint32_t j = 0; j < ids.length(); ++j) { + updatableColumns_.remove(ids[j]); + } + } + + // Get additional main partitions referenced by partitions. + // Check alter status of partitions and list unusable (added) indexes. + unusableIndexes_.clear(); + const Alterations& alterations = master.getIndexAlterations(); + const uint32_t nbAlterations = alterations.length(); + const uint32_t nbPartitions = partitions_.length(); + for (uint32_t i = 0; i < nbPartitions; ++i) { + PartitionGuard& p = partitions_[i]; + if (!p->isTransient()) { + PersistentPartition& partition = static_cast(*p); + PersistentPartition* mainPartition = partition.getMainPartition(); + if (mainPartition != 0) { + PartitionGuard mguard(mainPartition); + if (!partitions_.contains(mguard)) { + mainPartitions_.insertIfAbsent(mguard); + } + } + } + if (!p->isReady()) { + for (uint32_t j = 0; j < nbAlterations; ++j) { + const Alteration& alteration = alterations[j]; + if (alteration.getType() != ALT_ADD_INDEX + || alteration.getSerial() <= p->getIndexAlterSerial()) { + continue; + } + const uint32_t index = alteration.getId(); + unusableIndexes_.insertIfAbsent(index); + } + } + } + } + readers_.initialize(partitions_.length()); + resetPosition(); + loaded_ = true; + queryInfo_.updateIndirectors(); + + // Make sure MySQL will not use an index being created. + TABLE& table = getTable(); + for (uint32_t i = 0; i < unusableIndexes_.length(); ++i) { + const int bit = master.getMySqlIndexId(unusableIndexes_[i]); + table.keys_in_use_for_query.clear_bit(bit); + table.keys_in_use_for_group_by.clear_bit(bit); + table.keys_in_use_for_order_by.clear_bit(bit); + table.covering_keys.clear_bit(bit); + table.quick_keys.clear_bit(bit); + table.merge_keys.clear_bit(bit); + } +#ifndef NDEBUG + DBUG_PRINT("sparrow_context", ("Table %s.%s: loaded %u partitions into query context for %s", master.getDatabase().c_str(), + master.getTable().c_str(), partitions_.length(), speriods.c_str())); +#endif +} + +// Provide statistics to MySQL. +void Context::getStats(ha_statistics& stats, const uint flag) { + const Master& master = getShare().getMaster(); + TABLE& table = getTable(); + if (flag & HA_STATUS_VARIABLE) { + uint64_t records = 0; + uint64_t dataSize = 0; + uint64_t indexSize = 0; + TimePeriods periods; + loadTimePeriods( periods ); + if ( periods[0] == TimePeriod() ) { + ReadGuard guard(master.getLock()); + records = master.getRecords() + master.getTransientRecords(); + dataSize = master.getDataSize(); + indexSize = master.getIndexSize(); + } else { + load(); + SYSsortedVector mainSerials; + for (uint32_t i = 0; i < partitions_.length(); ++i) { + const Partition& partition = *partitions_[i].get(); + if (partition.isTransient()) { + records += partition.getRecords(); + } else { + const PersistentPartition& persistentPartition = static_cast(partition); + const PersistentPartition& mainPartition = *persistentPartition.getMainPartition(); + if (mainSerials.insertIfAbsent(mainPartition.getSerial())) { + records += mainPartition.getDataRecords(); + dataSize += mainPartition.getDataSize(); + indexSize += mainPartition.getIndexSize(); + } + } + } + } + stats.records = static_cast(records); + stats.deleted = 0; + stats.data_file_length = dataSize; + stats.index_file_length = indexSize; + stats.delete_length = 0; + if (records > 0) { + stats.mean_rec_length = static_cast(stats.data_file_length / records); + } else { + stats.mean_rec_length = 0; + } + stats.check_time = static_cast(0); + } + // See mysql\include\my_base.h:766. + if (flag & HA_STATUS_CONST) { + ReadGuard guard(master.getLock()); + stats.create_time = static_cast(master.getTimeCreated()); + stats.max_data_file_length = 0; + stats.max_index_file_length = 0; + stats.block_size = 0; + for (uint i = 0; i < table.s->keys; ++i) { + for (uint j = 0; j < table.key_info[i].user_defined_key_parts; ++j) { + table.key_info[i].rec_per_key[j]= 0; + } + } + } + if (flag & HA_STATUS_TIME) { + ReadGuard guard(master.getLock()); + stats.update_time = static_cast(master.getTimeUpdated()); + } + if (flag & HA_STATUS_AUTO) { + ReadGuard guard(master.getLock()); + stats.auto_increment_value = static_cast(master.getAutoInc()); + } +} + +// Moves to next row. +bool Context::moveNext(uint8_t* buffer) { + load(); + while (true) { + try { + if (position_.isValid()) { + const uint32_t partition = position_.getPartition(); + position_ = partitions_[partition]->moveNext(*this, position_); + if (!position_.isValid()) { + if (partition + 1 < partitions_.length()) { + position_ = partitions_[partition + 1]->moveFirst(*this, partition + 1); + } + } + } else if (!partitions_.isEmpty()) { + position_ = partitions_[0]->moveFirst(*this, 0); + } + return position_.isValid() && partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::largeForward2_); + } catch(const SparrowException&) { + // Ignore error and retry. + } + } +} + +// Moves to previous row. +bool Context::movePrevious(uint8_t* buffer) { + load(); + while (true) { + try { + if (position_.isValid()) { + const uint32_t partition = position_.getPartition(); + position_ = partitions_[partition]->movePrevious(*this, position_); + if (!position_.isValid()) { + if (partition > 0) { + position_ = partitions_[partition - 1]->moveLast(*this, partition - 1); + } + } + } else if (!partitions_.isEmpty()) { + const uint32_t partition = partitions_.length() - 1; + position_ = partitions_[partition]->moveLast(*this, partition); + } + return position_.isValid() && partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::largeBackward2_); + } catch(const SparrowException&) { + // Ignore error and retry. + } + } +} + +// Moves to absolute position in data files and read record. +bool Context::moveAbsolute(const uint64_t position, uint8_t* buffer) { + load(); + try { + const Position restoredPos = restorePosition(position); + if (restoredPos.isValid()) { + position_ = restoredPos; + return partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::smallAround2_); + } else { + return false; + } + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +// Sets the active index. If MAX_KEY, there is no active index. +void Context::setActiveIndex(const uint index) { + SPARROW_ENTER("Context::setActiveIndex"); + queryInfo_.update(getShare(), getTable(), index); + resetPosition(); +} + +// Finds and reads the record using the current index and the given key information. +bool Context::findRecord(const KeyValue& key, const enum ha_rkey_function findFlag, uint8_t* buffer) { + SPARROW_ENTER("Context::findRecord"); + load(); + resetPosition(); + const SearchFlag searchFlag(findFlag); + try { + const uint32_t nbPartitions = partitions_.length(); + for (uint32_t partition = 0; partition < nbPartitions; ++partition) { + Position& pos = positions_[partition]; + const PartitionGuard p = partitions_[partition]; + if (p.get() == NULL) { + throw SparrowException::create(false, "NULL partition in context."); + } + pos = partitions_[partition]->indexFind(*this, partition, key, searchFlag); + if (pos.isValid() && !partitions_[partition]->readKey(*this, pos, true, HA_WHOLE_KEY, keyValue(partition), true)) { + return false; + } + indirector_[partition] = partition; + } + sortKeyValues(); + if (searchFlag == SearchFlag::EQ || searchFlag == SearchFlag::GE || searchFlag == SearchFlag::GT) { + for (uint32_t i = 0; i < nbPartitions; ++i) { + const Position& pos = positions_[indirector_[i]]; + if (pos.isValid()) { + indirectorIndex_ = i; + position_ = pos; + break; + } + } + } else { + indirectorIndex_ = nbPartitions - 1; + position_ = positions_[indirector_[indirectorIndex_]]; + } + return position_.isValid() && partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::smallAround2_); + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +template class Sort; + +// Sort key values for all partitions, in ascending order. +// Upon return, the context's indirector gives the partition number for each value. +// Invalid positions (no record found) come first in the indirector array. +void Context::sortKeyValues() { + const KeyComparator comparator(*this); + Sort::quickSort(indirector_, comparator, 0, partitions_.length()); +} + +// Finds first record for current index. +bool Context::findFirstRecord(uint8_t* buffer) { + SPARROW_ENTER("Context::findFirstRecord"); + load(); + resetPosition(); + if (partitions_.isEmpty()) { + return false; + } + try { + // Save first position for all partitions. + uint32_t partition = 0; + while (partition < partitions_.length()) { + Position& position = positions_[partition]; + position = partitions_[partition]->indexFirst(*this, partition); + if (position.isValid()) { + if (!partitions_[partition]->readKey(*this, position, true, HA_WHOLE_KEY, keyValue(partition), true)) { + return false; + } + indirector_[partition] = partition; + partition++; + } else { + // Discard invalid partition. + partitions_.removeAt(partition); + positions_.removeAt(partition); + indirector_.removeAt(partition); + if (partitions_.isEmpty()) { + return false; + } + } + } + sortKeyValues(); + indirectorIndex_ = 0; + position_ = positions_[indirector_[0]]; + return position_.isValid() && partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::smallAround2_); + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +// Finds last record for current index. +bool Context::findLastRecord(uint8_t* buffer) { + SPARROW_ENTER("Context::findLastRecord"); + load(); + resetPosition(); + if (partitions_.isEmpty()) { + return false; + } + try { + // Save last position for all partitions. + uint32_t partition = 0; + while (partition < partitions_.length()) { + Position& position = positions_[partition]; + position = partitions_[partition]->indexLast(*this, partition); + if (position.isValid()) { + if (!partitions_[partition]->readKey(*this, position, true, HA_WHOLE_KEY, keyValue(partition), true)) { + return false; + } + indirector_[partition] = partition; + partition++; + } else { + // Discard invalid partition. + partitions_.removeAt(partition); + positions_.removeAt(partition); + indirector_.removeAt(partition); + if (partitions_.isEmpty()) { + return false; + } + } + } + sortKeyValues(); + indirectorIndex_ = partitions_.length() - 1; + position_ = positions_[indirector_[indirectorIndex_]]; + return position_.isValid() && partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::smallAround2_); + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +// Finds next record for current index. +bool Context::findNextRecord(uint8_t* buffer) { + SPARROW_ENTER("Context::findNextRecord"); + load(); + if (partitions_.isEmpty() || !position_.isValid()) { + return false; + } + try { + const uint32_t partition = position_.getPartition(); + Position& position = positions_[partition]; + if (position_.hasIntervalHint() && position_.getIndexHint() != position_.getEndHint()) { + position = partitions_[partition]->indexNext(*this, position); + position_ = position; + } else { + position = partitions_[partition]->indexNext(*this, position); + if (position.isValid() && !partitions_[partition]->readKey(*this, position, true, HA_WHOLE_KEY, keyValue(partition), true)) { + return false; + } + updateIndirector(position); + position_ = Position(); + const uint32_t nbPartitions = partitions_.length(); + for (uint32_t i = 0; i < nbPartitions; ++i) { + const Position& pos = positions_[indirector_[i]]; + if (pos.isValid()) { + indirectorIndex_ = i; + position_ = pos; + break; + } + } + } + return position_.isValid() && partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::smallAround2_); + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +// Finds previous record for current index. +bool Context::findPreviousRecord(uint8_t* buffer) { + SPARROW_ENTER("Context::findPreviousRecord"); + load(); + if (partitions_.isEmpty() || !position_.isValid()) { + return false; + } + try { + const uint32_t partition = position_.getPartition(); + Position& position = positions_[partition]; + if (position_.hasIntervalHint() && position_.getIndexHint() != position_.getStartHint()) { + position = partitions_[partition]->indexPrevious(*this, position); + position_ = position; + } else { + position = partitions_[partition]->indexPrevious(*this, position); + if (position.isValid() && !partitions_[partition]->readKey(*this, position, false, HA_WHOLE_KEY, keyValue(partition), true)) { + return false; + } + updateIndirector(position); + indirectorIndex_ = partitions_.length() - 1; + position_ = positions_[indirector_[indirectorIndex_]]; + } + return position_.isValid() && partitions_[position_.getPartition()]->readData(*this, position_, buffer, BlockCacheHint::smallAround2_); + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +template class BinarySearch; + +// Update indirector for index scan. +void Context::updateIndirector(const Position& position) { + const uint32_t save = indirector_[indirectorIndex_]; + uint32_t* data = const_cast(indirector_.data()); + const uint32_t nbPartitions = partitions_.length() - 1; + if (position.isValid()) { + SearchKeyComparator comparator(*this, position); + const uint32_t insertionPoint = BinarySearch::find(comparator, 0, nbPartitions, SearchFlag::GE); + memmove(data + indirectorIndex_, data + indirectorIndex_ + 1, sizeof(data[0]) * (nbPartitions - indirectorIndex_)); + if (insertionPoint == UINT_MAX || insertionPoint == nbPartitions) { + data[nbPartitions] = save; + } else { + memmove(data + insertionPoint + 1, data + insertionPoint, sizeof(data[0]) * (nbPartitions - insertionPoint)); + data[insertionPoint] = save; + } + } else { + memmove(data + indirectorIndex_, data + indirectorIndex_ + 1, sizeof(data[0]) * (nbPartitions - indirectorIndex_)); + memmove(data + 1, data, sizeof(data[0]) * nbPartitions); + data[0] = save; + } +} + +uint64_t Context::recordsTotal() { + const Master& master = getShare().getMaster(); + ReadGuard guard(master.getLock()); + return master.getRecords() + master.getTransientRecords(); +} + +// Returns an approximated number of records in the given range. +// CAUTION: if this method returns 0, MySQL will assume there is no row in the range! +// So make sure the returned result is either exact or over-estimated. +uint64_t Context::recordsInRange(const uint index, const key_range* minKey, const key_range* maxKey) { + SPARROW_ENTER("Context::recordsInRange"); + load(); + const uint32_t nbPartitions = partitions_.length(); + if (nbPartitions == 0) { + return 0; + } + try { + ChangeIndexGuard guard(*this, index); + + // Is the index unusable? + if (unusableIndexes_.contains(queryInfo_.getIndex())) { + return HA_POS_ERROR - 1; + } + const uint32_t maxPartitions = 10; // Do not scan all partitions. + const double ratio = static_cast(maxPartitions) / nbPartitions; + const bool useRatio = nbPartitions > maxPartitions; + uint64_t count = 0; + uint32_t pos = UINT_MAX; + uint32_t scannedPartitions = 0; + for (uint32_t i = 0; i < nbPartitions; ++i) { + const uint32_t newPos = useRatio ? static_cast(i * ratio) : i; + if (newPos == pos) { + continue; + } + pos = newPos; + scannedPartitions++; + const PartitionGuard& partition = partitions_[pos]; + count += partition->recordsInRange(*this, pos, minKey, maxKey); + } + count = static_cast(count * static_cast(nbPartitions) / scannedPartitions); + count = (count * sparrow_index_cost_percentage) / 100; // MySQL seems to favor full table scans. See ha_innobase::info(). + return std::max(uint64_t{2}, count); + } catch(const SparrowException& e) { + e.toLog(); + return 0; + } +} + +void Context::writeLock() { + writing_ = true; + getShare().getMaster().startUpdate(); +} + +void Context::unlock() { + if (writing_) { + getShare().getMaster().endUpdate(); + writing_ = false; + } +} + +bool Context::updateRecord(const uint8_t* buffer) { + SPARROW_ENTER("Context::updateRecord"); + try { + return position_.isValid() && partitions_[position_.getPartition()]->updateData(*this, position_, buffer); + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +void Context::startInsert(const uint32_t rows) { + SPARROW_ENTER("Context::startInsert"); + insertBuffer_.clear(); + insertRows_ = rows; +} + +bool Context::insertRecord(const uint8_t* buffer) { + SPARROW_ENTER("Context::insertRecord"); + try { + const TableFields& fields = getShare().getMappedFields(); + const uint32_t n = fields.length(); + for (uint32_t i = 0; i < n; ++i) { + fields[i]->insertTransform(buffer, insertBuffer_); + } + + if(insertBuffer_.position() >= sparrow_direct_insertion_threshold) { + // InsertBuffer_ is big enough. Send its content to sparrow + const Master& master = getShare().getMaster(); + // We have to save the current allocated size of the insertBuffer_ + const uint64_t realLimit = insertBuffer_.limit(); + // Limit the buffer to what is already written + insertBuffer_.limit(insertBuffer_.position()); + // And put the cursor at the beginning. + insertBuffer_.position(0); + InternalApi::write(master.getDatabase().c_str(), master.getTable().c_str(), insertBuffer_, insertRows_); + // Once the data are given to sparrow, we could restore the previous buffer length + insertBuffer_.limit(realLimit); + // And rewind to the beginning for writing the new data over the old ones. + insertBuffer_.position(0); + } + + return true; + } catch(const SparrowException& e) { + e.toLog(); + return false; + } +} + +bool Context::endInsert() { + SPARROW_ENTER("Context::endInsert"); + try { + const Master& master = getShare().getMaster(); + insertBuffer_.limit(insertBuffer_.position()); + insertBuffer_.position(0); + InternalApi::write(master.getDatabase().c_str(), master.getTable().c_str(), insertBuffer_, insertRows_); + insertBuffer_.clear(); + return true; + } catch(const SparrowException& e) { + insertBuffer_.clear(); + e.toLog(); + return false; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// KeyComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +KeyComparator::KeyComparator(Context& context) : context_(context), queryInfo_(context.getQueryInfo()), fields_(context.getShare().getMappedFields()) { +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SearchKeyComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SearchKeyComparator::SearchKeyComparator(Context& context, const Position& position) + : context_(context), queryInfo_(context.getQueryInfo()), fields_(context.getShare().getMappedFields()), + keyValue_(context.keyValue(position.getPartition()), HA_WHOLE_KEY) { + assert(position.isValid()); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionReaderGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +PartitionReaderGuard::PartitionReaderGuard(const PersistentPartition& partition, const uint32_t index, const bool isString, const BlockCacheHint& hint) _THROW_(SparrowException) + : reader_(partition.createReader(index, isString, hint)), owned_(true) { +} + +} diff --git a/storage/sparrow/engine/context.h b/storage/sparrow/engine/context.h new file mode 100644 index 000000000000..75638b351854 --- /dev/null +++ b/storage/sparrow/engine/context.h @@ -0,0 +1,430 @@ +/* + Table handler context. +*/ + +#ifndef _engine_context_h_ +#define _engine_context_h_ + +#include "sql/handler.h" + +#include "../handler/field.h" +#include "partition.h" +#include "list.h" +#include "fileutil.h" +#include "sort.h" +#include "condition.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// QueryInfo +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SparrowHandler; +class PartitionSnapshot; +class TransientPartition; +class TableShare; +class QueryInfo { +private: + + uint32_t mysqlIndex_; + uint32_t index_; + const KEY* keyInfo_; + SYSvector keyBuffer_; + KeyValue currentKey_; + KeyValue minKey_; + KeyValue maxKey_; + key_part_map dataMap_; + bool indexHasTimestamps_; + SYSpHash snapshots_; + +private: + + QueryInfo(const QueryInfo& right); + QueryInfo& operator = (const QueryInfo& right); + +public: + + void clear(const bool resetContext); + + QueryInfo() : snapshots_(4) { + clear(false); + } + + ~QueryInfo(); + + void update(TableShare& share, TABLE& table, const uint32_t index); + + uint32_t getMySQLIndex() const { + return mysqlIndex_; + } + + uint32_t getIndex() const { + return index_; + } + + bool isCoveringIndex() const { + return dataMap_ != 0; + } + + key_part_map getDataMap() const { + return dataMap_; + } + + const KEY& getKeyInfo() const { + return *keyInfo_; + } + + const KeyValue& getCurrentKey() const { + return currentKey_; + } + + const KeyValue& getMinKey() const { + return minKey_; + } + + const KeyValue& getMaxKey() const { + return maxKey_; + } + + KeyValue& getCurrentKey() { + return currentKey_; + } + + KeyValue& getMinKey() { + return minKey_; + } + + KeyValue& getMaxKey() { + return maxKey_; + } + + uint32_t getKeyLength() const { + return keyInfo_ == 0 ? 0 : keyInfo_->key_length; + } + + void saveKey(key_part_map map); + + void saveKey(KeyValue& keyValue) const; + + bool stopOnFirstMatch(const KeyValue& keyValue) const { + // While searching indexes, we can stop on first match only if the key is complete + // (i.e. it uses all index columns) AND the current index does not contain any + // TIMESTAMP key, because we store milliseconds and MySQL has only seconds. + + // TODO remove the check on TIMESTAMP key when MySQL supports milliseconds. + // (see http://bugs.mysql.com/bug.php?id=8523). + //return !indexHasTimestamps_ && (keyValue.getMap() + 1 == static_cast(1 << getKeyInfo().user_defined_key_parts)); + return (keyValue.getMap() + 1 == static_cast(1 << getKeyInfo().user_defined_key_parts)); + } + + void updateIndirectors(); + + bool snapshotTransientPartition(TransientPartition* partition); + + const PartitionSnapshot* getSnapshot(const TransientPartition* partition) const; + + const SYSpHash& getSnapshots() const { + return snapshots_; + } + + int compareKeys(const TableFields& fields, const KeyValue& leftKey, const KeyValue& rightKey) const; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// KeyIndirector +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef SYSvector KeyIndirector; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Context +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class KeyComparator; +class SearchKeyComparator; +typedef SYSarray KeyValues; +class TableShare; +class RecordWrapper; +class SerialRecordWrapper; +class PartSerialRecordWrapper; +typedef SYSsortedVector Ids; +typedef GrowingByteBuffer InsertBuffer; +class Context { + friend class KeyComparator; + friend class SearchKeyComparator; + +private: + + TABLE* table_; // MySQL Table object. + TableShare* share_; + + bool loaded_; + bool writing_; + bool altered_; + + ReferencedPartitions partitions_; // Partitions loaded in this context. + ReferencedPartitions mainPartitions_; // Additional main partitions referenced by partitions_. + PartitionReaders readers_; // Multiple readers per partition. + + Ids unusableIndexes_; // Indexes being added (alter) at the time this context is loaded. + Ids updatableColumns_; + + QueryInfo queryInfo_; // Query information. + Position position_; // Current position. + Positions positions_; // Candidate positions for each partition when scanning indexes. + KeyValues keyValues_; // Index key values for each position. + KeyIndirector indirector_; // Indirector to have sorted key values. + uint32_t indirectorIndex_; // Index in indirector for position_. + + SYSpHash recordWrappers_; + SYSpHash recordWrappersPerPartition_; + + uint32_t insertRows_; + InsertBuffer insertBuffer_; + +protected: + + void load(); + void loadTimePeriods(TimePeriods& periods); + + uint8_t* keyValue(const uint32_t partition) { + return keyValues_.data() + partition * queryInfo_.getKeyLength(); + } + + void sortKeyValues(); + + void updateIndirector(const Position& position); + +public: + + Context(); + + void initialize(TABLE* table, TableShare* share); + + ~Context(); + + void clone(const Context& context); + + void reset(); + + void getStats(ha_statistics& stats, const uint flag); + + const ReferencedPartitions& getPartitions() const { + return partitions_; + } + + ReferencedPartitions& getPartitions() { + return partitions_; + } + + QueryInfo& getQueryInfo() { + return queryInfo_; + } + + const QueryInfo& getQueryInfo() const { + return queryInfo_; + } + + TABLE& getTable() { + return *table_; + } + + const TableShare& getShare() const { + return *share_; + } + + TableShare& getShare() { + return *share_; + } + + PartitionReader* getReader(const uint32_t partition, const uint32_t index, const bool isString, const BlockCacheHint& hint) _THROW_(SparrowException); + + bool isUpdatableColumn(const uint32_t id) const { + return updatableColumns_.contains(id); + } + + uint64_t savePosition() const { + return (static_cast(position_.getPartition()) << 32) | static_cast(position_.getRow()); + } + + Position restorePosition(const uint64_t pos) const { + return Position(static_cast(pos >> 32), static_cast(pos)); + } + + static Str getQueryString(); + + void resetPosition(); + + bool moveNext(uint8_t* buffer); + + bool movePrevious(uint8_t* buffer); + + bool moveAbsolute(const uint64_t position, uint8_t* buffer); + + void setActiveIndex(const uint index); + + bool findRecord(const KeyValue& key, const enum ha_rkey_function findFlag, uint8_t* buffer); + + bool findFirstRecord(uint8_t* buffer); + + bool findLastRecord(uint8_t* buffer); + + bool findNextRecord(uint8_t* buffer); + + bool findPreviousRecord(uint8_t* buffer); + + uint64_t recordsTotal(); + + uint64_t recordsInRange(const uint index, const key_range* minKey, const key_range* maxKey); + + const RecordWrapper& getRecordWrapper(const uint32_t serial, const uint32_t index, const bool tree); + + const RecordWrapper& getRecordWrapper(const uint32_t alterSerial, const uint32_t partSerial, const ColumnIds& skippedColumns); + + void writeLock(); + + bool updateRecord(const uint8_t* buffer); + + void unlock(); + + void startInsert(const uint32_t rows); + + bool insertRecord(const uint8_t* buffer); + + bool endInsert(); + + void setAltered(bool altered) { altered_ = altered; } + bool getAltered() const { return altered_; } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// KeyComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class KeyComparator { +private: + + Context& context_; + const QueryInfo& queryInfo_; + const TableFields& fields_; + +public: + + KeyComparator(Context& context); + + int compare(const uint32_t row1, const uint32_t row2) const { + const Position& pos1 = context_.positions_[row1]; + const Position& pos2 = context_.positions_[row2]; + if (!pos1.isValid()) { + return pos2.isValid() ? -1 : 0; + } + if (!pos2.isValid()) { + return 1; + } + const KeyValue keyValue1(context_.keyValue(row1), HA_WHOLE_KEY); + const KeyValue keyValue2(context_.keyValue(row2), HA_WHOLE_KEY); + const int cmp = queryInfo_.compareKeys(fields_, keyValue1, keyValue2); + if (cmp == 0) { + return row1 > row2 ? 1 : (row1 < row2 ? -1 : 0); + } else { + return cmp; + } + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SearchKeyComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SearchKeyComparator { +private: + + Context& context_; + const QueryInfo& queryInfo_; + const TableFields& fields_; + const KeyValue keyValue_; + +public: + + SearchKeyComparator(Context& context, const Position& position); + + int compareTo(const uint32_t row) _THROW_(SparrowException) { + const uint32_t partition = context_.indirector_[row >= context_.indirectorIndex_ ? row + 1 : row]; + const Position& pos = context_.positions_[partition]; + if (!pos.isValid()) { + return -1; + } + return queryInfo_.compareKeys(fields_, KeyValue(context_.keyValue(partition), HA_WHOLE_KEY), keyValue_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionReaderGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PartitionReaderGuard { +private: + + PartitionReader* reader_; + const bool owned_; + +public: + + // For query processing. + PartitionReaderGuard(Context& context, const uint32_t partition, const uint32_t index, const bool isString, const BlockCacheHint& hint) _THROW_(SparrowException) + : reader_(context.getReader(partition, index, isString, hint)), owned_(false) { + } + + // For coalescing and index alteration. + PartitionReaderGuard(const PersistentPartition& partition, const uint32_t index, const bool isString, const BlockCacheHint& hint) _THROW_(SparrowException); + + // For coalescing. + PartitionReaderGuard(PartitionReader* reader) : reader_(reader), owned_(false) { + } + + ~PartitionReaderGuard() { + reader_->release(); + if (owned_) { + delete reader_; + } + } + + PartitionReader& get() { + return *reader_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ChangeIndexGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ChangeIndexGuard { +private: + + Context& context_; + uint32_t savedMySqlIndex_; + +public: + + ChangeIndexGuard(Context& context, const uint index) : context_(context) { + QueryInfo& queryInfo = context.getQueryInfo(); + if (queryInfo.getMySQLIndex() == index) { + savedMySqlIndex_ = UINT_MAX; + } else { + savedMySqlIndex_ = queryInfo.getMySQLIndex(); + context_.setActiveIndex(index); + } + } + + ~ChangeIndexGuard() { + if (savedMySqlIndex_ != UINT_MAX) { + context_.setActiveIndex(savedMySqlIndex_); + } + } + +}; + +} + +#endif /* #ifndef _engine_context_h_ */ diff --git a/storage/sparrow/engine/exception.h b/storage/sparrow/engine/exception.h new file mode 100644 index 000000000000..f385f00ffdd2 --- /dev/null +++ b/storage/sparrow/engine/exception.h @@ -0,0 +1,66 @@ +/* + Sparrow exception. +*/ + +#ifndef _engine_exception_h_ +#define _engine_exception_h_ + +#include "my_compiler.h" + +namespace Sparrow { + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowException +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN32 +// Disable warning regarding exception specification. +#pragma warning(disable:4290) +#endif + +#ifdef __GNUG__ + #if __GNUC__ >= 8 + #define _THROW_(a) + #else + #define _THROW_(a) throw(a) + #endif +#elif defined(_MSC_VER) + #if _MSC_VER >= 1800 + #define _THROW_(a) + #else + #define _THROW_(a) throw(a) + #endif +#else + #define _THROW_(a) throw(a) +#endif + +class SparrowException { +private: + + const bool logged_; + unsigned int err_code_; + char buffer_[1024]; + +public: + + SparrowException(const char* text, const bool logged = true, unsigned int err_code=-1); + static SparrowException create(const bool addError, const char* format, ...) MY_ATTRIBUTE((format(printf, 2, 3))); + bool isLogged() const { + return logged_; + } + const char* getText() const { + return buffer_; + } + void toLog() const; + unsigned int get_err_code() const { + return err_code_; + } + void set_err_code(unsigned int err_code) { + err_code_ = err_code; + } +}; + +} + +#endif /* #ifndef _engine_exception_h_ */ diff --git a/storage/sparrow/engine/fileutil.cc b/storage/sparrow/engine/fileutil.cc new file mode 100644 index 000000000000..8016c03a289b --- /dev/null +++ b/storage/sparrow/engine/fileutil.cc @@ -0,0 +1,963 @@ +/* + File utilities. +*/ + +#include "fileutil.h" +#include "cache.h" +#include "persistent.h" +#include "internalapi.h" +#include "purge.h" + +#include +#include +#include +#include "../engine/log.h" +#include "sql/mysqld.h" + + +#ifdef _WIN32 +#include "winioctl.h" +#endif + +#ifndef _WIN32 +#include +#include +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileUtil +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN32 +const char FileUtil::separator_ = '\\'; +#else +const char FileUtil::separator_ = '/'; +#endif +uint32_t FileUtil::sectorSize_; +uint32_t FileUtil::pageSize_; +Filesystems FileUtil::filesystems_; +FilesystemIds FileUtil::filesystemIds_; +Filesystems FileUtil::coalescingFilesystems_; +FilesystemIds FileUtil::coalescingFilesystemIds_; +Filesystems FileUtil::allFilesystems_; +Lock FileUtil::lock_(true, "FileUtil::lock_"); +struct rand_struct FileUtil::rnd_; + +// Miscellaneous initializations. +void FileUtil::initialize() _THROW_(SparrowException) { +#ifdef _WIN32 + // Gets the sector size of the MySQL data directory. + DWORD sectorsPerCluster, bytesPerSector, freeClusters, totalClusters; + if (GetDiskFreeSpace(mysql_real_data_home, §orsPerCluster, &bytesPerSector, + &freeClusters, &totalClusters) != 0) { + sectorSize_ = static_cast(bytesPerSector); + } + + // Try to get the physical sector size in case the drive emulates a 512bytes sector size for compatibility with legacy hardware + if (strlen(mysql_real_data_home) > 2 && mysql_real_data_home[1] == ':') { + char drive[16]; + sprintf(drive, "\\\\.\\%c:", mysql_real_data_home[0]); + + STORAGE_PROPERTY_QUERY Query; + STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR Alignment = {0}; + ZeroMemory(&Query, sizeof(Query)); + HANDLE hFile = CreateFileA( drive, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile==INVALID_HANDLE_VALUE) { + spw_print_information("Could not get physical sector size of data drive %s (error %lu opening drive). Using logical sector size, %uB.", + drive, GetLastError(), sectorSize_); + } else { + Query.QueryType = PropertyStandardQuery; + Query.PropertyId = StorageAccessAlignmentProperty; + + DWORD Bytes = 0; + BOOL res = DeviceIoControl( hFile, IOCTL_STORAGE_QUERY_PROPERTY, &Query, sizeof(STORAGE_PROPERTY_QUERY), &Alignment, + sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR), &Bytes, NULL); + if (res == FALSE) { + spw_print_information("Could not get physical sector size of data drive %s (error %lu). Using logical sector size, %uB.", + drive, GetLastError(), sectorSize_); + } else { + spw_print_information("Data drive %s has a physical sector size of %uB and a logical sector size of %uB. Using physical sector size for unbuffered IOs.", + drive, sectorSize_, Alignment.BytesPerPhysicalSector); + sectorSize_ = Alignment.BytesPerPhysicalSector; + } + CloseHandle(hFile); + } + } else { + spw_print_information("Using logical sector size of %u for unbuffered IOs.", sectorSize_); + } + + // Gets the system page size. + SYSTEM_INFO systemInfo; + GetSystemInfo(&systemInfo); + pageSize_ = systemInfo.dwPageSize; +#else + // On Unix, retrieve the sector size from the system variables. + sectorSize_ = sparrow_disk_sector_size; + + // Gets the system page size. + pageSize_ = getpagesize(); +#endif + + // List file systems. + const time_t start_time = time(0); + randominit(&rnd_, static_cast(start_time), static_cast(start_time) / 2); + filesystems_.append(new Filesystem(mysql_real_data_home)); + if (sparrow_filesystems != 0) { + Str copy(sparrow_filesystems); + char* tmp; + char* token = my_strtok_r(const_cast(copy.c_str()), ",", &tmp); + while (token != 0) { + if (doesFileExist(token)) { + filesystems_.append(new Filesystem(token)); + } else { + spw_print_error("Sparrow will not use file system %s because it does not exist; please check variable sparrow_filesystems", token); + } + token = my_strtok_r(0, ",", &tmp); + } + } + allFilesystems_ = filesystems_; + if (sparrow_coalescing_filesystems != 0) { + Str copy(sparrow_coalescing_filesystems); + char* tmp; + char* token = my_strtok_r(const_cast(copy.c_str()), ",", &tmp); + while (token != 0) { + if (doesFileExist(token)) { + Filesystem* fs = new Filesystem(token); + coalescingFilesystems_.append(fs); + allFilesystems_.append(fs); + } else { + spw_print_error("Sparrow will not use coalescing file system %s because it does not exist; please check variable sparrow_coalescing_filesystems", token); + } + token = my_strtok_r(0, ",", &tmp); + } + } + getFreeDiskSpace(); +} + +// Gets the parent directory of the given path name. +// Returns length of parent directory or 0 if no parent. +// STATIC +const char* FileUtil::getParent(const char* path, char* buffer) { + int l = static_cast(strlen(path)); + while (l >= 0 && path[l] == separator_) { + l--; + } + while (l >= 0 && path[l] != separator_) { + l--; + } + if (l <= 0) { + return 0; + } + strncpy(buffer, path, l); + buffer[l] = 0; + return buffer; +} + +// Recursively creates all required directories (if they do not exist) for the given file name. +// STATIC +void FileUtil::createDirectories(const char* path) { + // Try to create parent directory. + char parent[FN_REFLEN]; + if (getParent(path, parent) == 0) { + return; + } + + // Check if directory already exists. + if (doesFileExist(parent)) { + return; + } + + // Recurse. + createDirectories(parent); + my_mkdir(parent, 0777, MYF(0)); +} + +// Recursively deletes all files and directories under the given directory. +// STATIC +void FileUtil::deleteDirectory(const char* path) { + MY_DIR* dir = my_dir(path, MYF(MY_DONT_SORT|MY_WANT_STAT)); + if (dir != 0) { + for (uint i = 0; i < dir->number_off_files; ++i) { + const fileinfo& file = dir->dir_entry[i]; + const char* filename = file.name; + // Ignore "." and "..". + if (filename[0] == '.' + && (filename[1] == 0 || (filename[1] == '.' && filename[2] == 0))) { + continue; + } + char fullFilename[FN_REFLEN]; + snprintf(fullFilename, sizeof(fullFilename), "%s%c%s", path, FileUtil::separator_, filename); + if (MY_S_ISDIR(file.mystat->st_mode)) { + deleteDirectory(fullFilename); + } else { + //spw_print_information("Deleting file %s",fullFilename); + int err = my_delete(fullFilename, MYF(0)); + if ( err != 0 && my_errno() != ENOENT ) { + char errMsg[MYSYS_STRERROR_SIZE]; + my_strerror(errMsg, sizeof(errMsg), my_errno()); + spw_print_information("Failed to delete %s: error code %d (%s)",fullFilename, my_errno(), errMsg); + } + } + } + my_dirend(dir); + rmdir(path); + } +} + +// Recursively scans a directory and returns files or directories matching the given extension. +// The level is 1 to scan only files or directory directly under the directory. +// If parameter forFiles is true, only files are returned. Otherwise only directories are returned. +// STATIC +void FileUtil::scanDirectory(const char* path, const char* extension, + const uint32_t level, Files& files, const bool forFiles) _THROW_(SparrowException) { + const size_t extLength = strlen(extension); +#ifdef _WIN32 + Str pathx(path); + Str simplePath; + int pos = pathx.length() - 1; + if (pathx.c_str()[pos] == '\\') { + simplePath = Str(pathx.c_str(), pos); + pathx += Str("*"); + } else { + simplePath = pathx; + pathx += Str("\\*"); + } + path = pathx.c_str(); + WIN32_FIND_DATA data; + DirGuard guard(path, &data); + for (;;) { + const char* name = data.cFileName; + if (strcmp(name, ".") != 0 && strcmp(name, "..") != 0) { + char fullName[8192]; + snprintf(fullName, sizeof(fullName), "%s\\%s", simplePath.c_str(), name); + if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + if (!forFiles) { + const size_t length = strlen(name); + if (length > extLength && strcmp(name + length - extLength, extension) == 0) { + files.append(Str(fullName)); + } + } + if (level > 1) { + scanDirectory(fullName, extension, level - 1, files, forFiles); + } + } else if (forFiles) { + const size_t length = strlen(name); + if (length > extLength && strcmp(name + length - extLength, extension) == 0) { + files.append(Str(fullName)); + } + } + } + if (FindNextFile(guard.get(), &data) == 0) { + if (GetLastError() == ERROR_NO_MORE_FILES) { + break; + } + throw SparrowException::create(true, "Cannot scan directory %s", path); + } + } +#else + Str pathx(path); + int pos = pathx.length() - 1; + if (pathx.c_str()[pos] == '/') { + pathx = Str(pathx.c_str(), pos); + } + path = pathx.c_str(); + DirGuard guard(path); + DIR* dir = guard.get(); + //char direntBuf[sizeof(struct dirent) + _POSIX_PATH_MAX + 100]; + struct dirent* entry = 0; + for (;;) { + errno = 0; + entry = readdir(dir); + ///int result = readdir_r(dir, reinterpret_cast(direntBuf), &entry); + // if (result != 0) { + // throw SparrowException::create(true, "Cannot read directory %s", path); + // } + if (entry == 0) { + if (errno != 0) { + throw SparrowException::create(true, "Cannot read directory %s", path); + } + break; + } + const char* name = entry->d_name; + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + continue; + } + char fullName[8192]; + snprintf(fullName, sizeof(fullName), "%s/%s", path, name); + MY_STAT stat; + if (my_stat(fullName, &stat, MYF(0)) == 0) { + throw SparrowException::create(true, "Cannot get stats of %s", fullName); + } + if (S_ISDIR(stat.st_mode)) { + if (!forFiles) { + const size_t length = strlen(name); + if (length > extLength && strcmp(name + length - extLength, extension) == 0) { + files.append(Str(fullName)); + } + } + if (level > 1) { + scanDirectory(fullName, extension, level - 1, files, forFiles); + } + } else if (forFiles) { + const size_t length = strlen(name); + if (length > extLength && strcmp(name + length - extLength, extension) == 0) { + files.append(Str(fullName)); + } + } + } +#endif +} + + +// Recursively scans a directory and returns files or directories matching the given extension. +// The level is 1 to scan only files or directory directly under the directory. +// If parameter forFiles is true, only files are returned. Otherwise only directories are returned. +// STATIC +void FileUtil::dbg_dump_content(const char* path) _THROW_(SparrowException) { + + spw_print_information("[DBG] dumping content of %s",path); +#ifdef _WIN32 + Str pathx(path); + Str simplePath; + int pos = pathx.length() - 1; + if (pathx.c_str()[pos] == '\\') { + simplePath = Str(pathx.c_str(), pos); + pathx += Str("*"); + } else { + simplePath = pathx; + pathx += Str("\\*"); + } + path = pathx.c_str(); + WIN32_FIND_DATA data; + DirGuard guard(path, &data); + for (;;) { + const char* name = data.cFileName; + if (strcmp(name, ".") != 0 && strcmp(name, "..") != 0) { + char fullName[8192]; + snprintf(fullName, sizeof(fullName), "%s\\%s", simplePath.c_str(), name); + spw_print_information("[DBG] %s",fullName); + if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + dbg_dump_content(fullName); + } + } + if (FindNextFile(guard.get(), &data) == 0) { + if (GetLastError() == ERROR_NO_MORE_FILES) { + break; + } + throw SparrowException::create(true, "Cannot scan directory %s", path); + } + } +#else + Str pathx(path); + int pos = pathx.length() - 1; + if (pathx.c_str()[pos] == '/') { + pathx = Str(pathx.c_str(), pos); + } + path = pathx.c_str(); + DirGuard guard(path); + DIR* dir = guard.get(); + //char direntBuf[sizeof(struct dirent) + _POSIX_PATH_MAX + 100]; + struct dirent* entry = 0; + for (;;) { + errno = 0; + entry = readdir(dir); + // int result = readdir_r(dir, reinterpret_cast(direntBuf), &entry); + // if (result != 0) { + // throw SparrowException::create(true, "Cannot read directory %s", path); + // } + if (entry == 0) { + if (errno != 0) { + throw SparrowException::create(true, "Cannot read directory %s", path); + } + break; + } + const char* name = entry->d_name; + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + continue; + } + char fullName[8192]; + snprintf(fullName, sizeof(fullName), "%s/%s", path, name); + MY_STAT stat; + if (my_stat(fullName, &stat, MYF(0)) == 0) { + throw SparrowException::create(true, "Cannot get stats of %s", fullName); + } + spw_print_information("[DBG] %s, rights %u, uid %u, gid %u, size %ld",fullName, + stat.st_mode, stat.st_uid, stat.st_gid, stat.st_size); + if (S_ISDIR(stat.st_mode)) { + dbg_dump_content(fullName); + } + } +#endif +} + + +// Checks if a file/directory exists. +// STATIC +bool FileUtil::doesFileExist(const char* path) { + MY_STAT stat; + return (my_stat(path, &stat, MYF(0)) != 0); +} + +// Gets the size of a file. +// STATIC +uint64_t FileUtil::getFileSize(File file) _THROW_(SparrowException) { +#ifdef _WIN32 + LARGE_INTEGER size; + if (file != -1 && GetFileSizeEx(my_get_osfhandle(file), &size)) { + return static_cast(size.QuadPart); + } +#else + struct stat s; + if (file != -1 && fstat(file, &s) == 0) { + return static_cast(s.st_size); + } +#endif + throw SparrowException::create(true, "Cannot get file size"); +} + +// STATIC +void FileUtil::rename(const char* from, const char* to) _THROW_(SparrowException) { +#ifdef _WIN32 + do { + if (MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH)) + break; + if (GetLastError() == ERROR_SHARING_VIOLATION) { + my_sleep(100000); // 100ms + } else { + throw SparrowException::create(true, "Cannot rename file or directory \"%s\" to \"%s\"", from, to); + } + } while (true); +#else + do { + if (::rename(from, to) == 0) + break; + if (errno == EBUSY) { + my_sleep(100000); // 100ms + } else { + throw SparrowException::create(true, "Cannot rename file or directory \"%s\" to \"%s\"", from, to); + } + } while (true); +#endif +} + +// STATIC +Str FileUtil::getDatabaseName(const char* path) { + const int len = path == 0 ? 0 : static_cast(strlen(path)); + if (len == 0) { + return Str(); + } + const char* t = path + len - 1; + while (t != path && *t != '/' && *t != '\\') { + t--; + } + if (*t != '/' && *t != '\\') { + return Str(); + } + const char* saved = t; + t--; + while (t != path && *t != '/' && *t != '\\') { + t--; + } + if (*t != '/' && *t != '\\') { + return Str(); + } + t++; + return Str(t, static_cast(saved - t)); +} + +// STATIC +Str FileUtil::getTableName(const char* path) { + const int len = path == 0 ? 0 : static_cast(strlen(path)); + if (len == 0) { + return Str(); + } + const char* dot = 0; + const char* t = path + len - 1; + while (t != path && *t != '/' && *t != '\\') { + if (*t == '.') { + dot = t; + } + t--; + } + if (*t != '/' && *t != '\\') { + return Str(); + } + t++; + if (dot == 0) { + return Str(t); + } else { + return Str(t, static_cast(dot - t)); + } +} + +// Gets the free disk space. +// STATIC +uint64_t FileUtil::getFreeDiskSpace() { + SPARROW_ENTER("FileUtil::getFreeDiskSpace"); + const uint64_t free = updateStats(filesystems_, filesystemIds_, false); + const uint64_t coalescingFree = updateStats(coalescingFilesystems_, coalescingFilesystemIds_, true); + Atomic::set64(&SparrowStatus::get().freeDiskSpace_, free + coalescingFree); + return free; +} + +// STATIC +uint64_t FileUtil::updateStats(Filesystems& filesystems, FilesystemIds& filesystemIds, const bool coalescing) { + uint64_t total = 0; + uint64_t minFree = ULLONG_MAX; + uint64_t maxFree = 0; + const uint32_t n = filesystems.length(); + const uint64_t margin = Purge::getSecurityMargin(); + for (uint32_t i = 0; i < n; ++i) { + const uint64_t f = filesystems[i]->computeStats(); + total += f; + const uint64_t af = f > margin ? f - margin : 0; + if (af != 0) { + minFree = std::min(minFree, af); + maxFree = std::max(maxFree, af); + } + } + + double unit = minFree; + if ( (maxFree/minFree) > 100 ) { + unit = maxFree/100.0; + //spw_print_information("Updating FS stats. Free space: min %llu (%lluMB), max %llu (%lluMB), total %llu (%lluMB). Limiting computing unit to %f", + // static_cast(minFree), static_cast(minFree/(1024*1024)), static_cast(maxFree), static_cast(maxFree/(1024*1024)), + // static_cast(total), static_cast(total/(1024*1024)), unit ); + } + + // Prepare array for weighted round robin. + const uint32_t factor = 5; + FilesystemIds tmp(minFree == ULLONG_MAX ? n : static_cast((factor * total) / unit)); + for (uint32_t i = 0; i < n; ++i) { + if (minFree != ULLONG_MAX) { + const Filesystem& fs = *filesystems[i]; + const uint64_t f = fs.getFree(); + const uint64_t af = f > margin ? f - margin : 0; + if (af != 0) { + const uint32_t count = static_cast((factor * af) / unit); + for (uint32_t j = 0; j < count; ++j) { + tmp.append(i); + } + } + // We want an empty result if there is no space left on coalescing file systems. + } else if (!coalescing) { + tmp.append(i); + } + } + FilesystemIds ids(tmp.capacity()); + while (!tmp.isEmpty()) { + const uint32_t i = static_cast(my_rnd(&rnd_) * tmp.length()); + ids.append(tmp[i]); + tmp.removeAt(i); + } + { + Guard guard(lock_); + filesystemIds = ids; + } + return total; +} + +void FileUtil::getDiskStats(uint64_t& totalFree, uint64_t& totalUsed, uint64_t& totalSize) +{ + SPARROW_ENTER("FileUtil::getDiskStats"); + totalSize = 0; + totalUsed = 0; + totalFree = 0; + const uint32_t n = filesystems_.length(); + const uint32_t na = allFilesystems_.length(); + for (uint32_t i = 0; i < na; ++i) + { + const bool coalescing = i >= n; + const Filesystem& fs = coalescing ? *coalescingFilesystems_[i - n] : *filesystems_[i]; + const uint64_t u = fs.getUsed(); + totalUsed += u; + const uint64_t s = fs.getSize(); + totalSize += s; + const uint64_t f = fs.getFree(); + totalFree += f; + } +} + +// Chooses the file system to write to. +// Use a weighted round robin, where weights are computed using file system free space. +// STATIC +uint32_t FileUtil::chooseFilesystem(const bool coalescing) { + SPARROW_ENTER("FileUtil::chooseFilesystem"); + Guard guard(lock_); + if (coalescing && !coalescingFilesystemIds_.isEmpty()) { + static uint32_t coalescingId = 0; + return COALESCING_FILESYSTEM + coalescingFilesystemIds_[coalescingId++ % coalescingFilesystemIds_.length()]; + } else { + static uint32_t id = 0; + return filesystemIds_[id++ % filesystemIds_.length()]; + } +} + +// STATIC +const char* FileUtil::getFilesystemPath(const uint32_t filesystem) { + if (filesystem >= COALESCING_FILESYSTEM) { + const uint32_t length = coalescingFilesystems_.length(); + const uint32_t i = filesystem - COALESCING_FILESYSTEM; + return length == 0 ? filesystems_[0]->getPath().c_str() : coalescingFilesystems_[i >= length ? length - 1 : i]->getPath().c_str(); + } else { + const uint32_t length = filesystems_.length(); + return filesystems_[filesystem >= length ? length - 1 : filesystem]->getPath().c_str(); + } +} + +// Report status of file systems. +// STATIC +void FileUtil::report(PrintBuffer& buffer) _THROW_(SparrowException) { + SPARROW_ENTER("FileUtil::report"); + buffer << "\nFilesystems:\n\n"; + const char* h[] = { "Path", "Size", "Used", "Free" }; + SYSvector headers(sizeof(h) / sizeof(h[0])); + for (uint32_t i = 0; i < headers.capacity(); ++i) { + headers.append(Str(h[i])); + } + char tmp[1024]; + uint64_t totalSize = 0; + uint64_t totalUsed = 0; + uint64_t totalFree = 0; + SYSslist strings; + const uint32_t n = filesystems_.length(); + const uint32_t na = allFilesystems_.length(); + for (uint32_t i = 0; i < na; ++i) { + const bool coalescing = i >= n; + const Filesystem& fs = coalescing ? *coalescingFilesystems_[i - n] : *filesystems_[i]; + Str name(fs.getPath()); + if (coalescing) { + name += Str(" (coalescing)"); + } + strings.append(name); + const uint64_t s = fs.getSize(); + strings.append(Str::fromSize(s)); + totalSize += s; + const uint64_t u = fs.getUsed(); + snprintf(tmp, sizeof(tmp), "%s (%.1f%%)", Str::fromSize(u).c_str(), (100.0 * u) / s); + strings.append(Str(tmp)); + totalUsed += u; + const uint64_t f = fs.getFree(); + snprintf(tmp, sizeof(tmp), "%s (%.1f%%)", Str::fromSize(f).c_str(), (100.0 * f) / s); + strings.append(Str(tmp)); + totalFree += f; + } + strings.append(Str("TOTAL")); + strings.append(Str::fromSize(totalSize)); + snprintf(tmp, sizeof(tmp), "%s (%.1f%%)", Str::fromSize(totalUsed).c_str(), (100.0 * totalUsed) / totalSize); + strings.append(Str(tmp)); + snprintf(tmp, sizeof(tmp), "%s (%.1f%%)", Str::fromSize(totalFree).c_str(), (100.0 * totalFree) / totalSize); + strings.append(Str(tmp)); + InternalApi::printGrid(buffer, headers, strings, 4); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Filesystem +////////////////////////////////////////////////////////////////////////////////////////////////////// + +uint64_t Filesystem::computeStats() { + SPARROW_ENTER("Filesystem::computeStats"); + uint64_t s = 0; + uint64_t u = 0; + uint64_t f = 0; +#ifdef _WIN32 + DWORD sectorsPerCluster, bytesPerSector, freeClusters, totalClusters; + if (GetDiskFreeSpace(getPath().c_str(), §orsPerCluster, &bytesPerSector, + &freeClusters, &totalClusters) != 0) { + s = static_cast(totalClusters) * sectorsPerCluster * bytesPerSector; + f = static_cast(freeClusters) * sectorsPerCluster * bytesPerSector; + } +#elif defined(__MACH__) + struct statvfs vfs; + if (statvfs(getPath().c_str(), &vfs) == 0) { + s = static_cast(vfs.f_frsize) * vfs.f_blocks; + f = static_cast(vfs.f_frsize) * vfs.f_bavail; + } +#else + struct statvfs64 vfs; + if (statvfs64(getPath().c_str(), &vfs) == 0) { + s = static_cast(vfs.f_frsize) * vfs.f_blocks; + f = static_cast(vfs.f_frsize) * vfs.f_bavail; + } +#endif + u = s > f ? s - f : 0; + size_ = s; + used_ = u; + Atomic::set64(&free_, f); +#ifndef NDEBUG + const Str ssize(Str::fromSize(s)); + const Str sused(Str::fromSize(u)); + const Str sfree(Str::fromSize(f)); + DBUG_PRINT("sparrow_purge", ("File system %s: %s size, %s used, %s free", getPath().c_str(), ssize.c_str(), sused.c_str(), sfree.c_str())); +#endif + return f; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ReferencedBlocks +////////////////////////////////////////////////////////////////////////////////////////////////////// + +volatile uint32_t ReferencedBlocks::lockedBlocks_ = 0; +volatile uint32_t ReferencedBlocks::maxLockedBlocks_ = 0; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileHeader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// File format history: +// 1 Initial format: header, tree, binary, records. +// 2 Format suitable for coalescing: records, binary, tree, header. +// 3 Extend header with tree information. +const uint8_t FileHeader::currentFileFormat_ = 3; + +// Header sizes for each format. +const uint32_t FileHeader::sizes_[] = { 0, 57, 89, FileHeader::computeSize() }; + +// STATIC +uint32_t FileHeader::computeSize() { + uint8_t bytes[1024]; + ByteBuffer buffer(bytes, sizeof(bytes)); + buffer << FileHeader(); + return static_cast(buffer.position()); +} + +FileHeader::FileHeader() { +} + +FileHeader::FileHeader(const uint64_t binSize, const uint32_t treeSize, const bool treeComplete, const uint32_t nodeSize, const uint32_t recordSize, + const uint32_t records, const uint32_t index, const uint64_t start, const uint64_t end) + : format_(FileHeader::currentFileFormat_), recordSize_(recordSize), records_(records), + treeComplete_(treeComplete), nodeSize_(0), nodes_(0), index_(index), start_(start), end_(end), treeOrder_(0) { + // Here is the file layout: + // +--------+---------+------+-------------+---------+--------+ + // | Format | Records | Tree | Binary Data | Padding | Header | + // +--------+---------+------+-------------+---------+--------+ + // Note padding is necessary because the whole file must have a size adjusted to the sector size, + // and the header must be at the end of the file. + uint64_t offset = 4; // Format is coded on 4 bytes. + recordsSection_ = FileSection(offset, static_cast(records) * recordSize); + offset += recordsSection_.getSize(); + treeSection_ = FileSection(offset, treeSize); + offset += treeSize; + binSection_ = FileSection(offset, binSize); + offset += binSize; + const uint64_t totalSize = offset + FileHeader::size(); + totalSize_ = FileUtil::adjustSizeToSectorSize(totalSize); // Adjust total size on sector size. + genTime_ = static_cast(std::time(nullptr)); + initialize(nodeSize); +} + +void FileHeader::initialize(const uint32_t nodeSize /* = 0 */) { + if (index_ != DATA_FILE) { + if (nodeSize > 0) { + nodeSize_ = nodeSize; + nodes_ = static_cast(treeSection_.getSize() / nodeSize_); + assert(treeSection_.getSize() % nodeSize_ == 0); + } + treeOrder_ = &TreeOrder::get(nodes_); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DataFileHeader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const uint32_t DataFileHeader::size_ = DataFileHeader::computeSize(); + +// STATIC +uint32_t DataFileHeader::computeSize() { + uint8_t bytes[1024]; + ByteBuffer buffer(bytes, sizeof(bytes)); + buffer << DataFileHeader(); + return static_cast(buffer.position()); +} + +DataFileHeader::DataFileHeader() { +} + +DataFileHeader::DataFileHeader(const uint32_t recordSize, const uint64_t records, const uint64_t stringOffset, const uint64_t stringSize, const uint64_t start, const uint64_t end) + : genTime_(static_cast(std::time(nullptr))), recordsSection_(size_, records * recordSize), + recordSize_(recordSize), records_(records), stringsSection_(stringOffset, stringSize), start_(start), end_(end) { +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IndexFileHeader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const uint32_t IndexFileHeader::size_ = IndexFileHeader::computeSize(); + +// STATIC +uint32_t IndexFileHeader::computeSize() { + uint8_t bytes[1024]; + ByteBuffer buffer(bytes, sizeof(bytes)); + buffer << IndexFileHeader(); + return static_cast(buffer.position()); +} + +IndexFileHeader::IndexFileHeader() { +} + +IndexFileHeader::IndexFileHeader(const uint32_t index, const uint32_t recordSize, const uint64_t records, + const uint32_t nodeSize, const uint64_t nodes, const uint64_t start, const uint64_t end) + : genTime_(static_cast(std::time(nullptr))), index_(index), recordsSection_(size_, records * recordSize), + recordSize_(recordSize), records_(records), treeSection_(size_ + records * recordSize, nodes * nodeSize), + nodeSize_(nodeSize), nodes_(nodes), start_(start), end_(end) { + treeOrder_ = &TreeOrder::get(static_cast(nodes_)); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +PartitionReader::PartitionReader(const PersistentPartition& partition, const uint32_t fileId, const BlockCacheHint& hint) _THROW_(SparrowException) + : FileReader(partition, fileId, this), hint_(hint), columnAlterSerial_(partition.getColumnAlterSerial()) { + header_ = partition.readHeader(fileId, *this); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionReaders +////////////////////////////////////////////////////////////////////////////////////////////////////// + +PartitionReader* PartitionReaders::get(const uint32_t n, PersistentPartition& partition, const uint32_t index, const bool isString, const BlockCacheHint& hint) _THROW_(SparrowException) { + assert(index != STRING_FILE); + const uint32_t level = hint.getLevel(); + assert(level == 1 || level == 2); + const uint32_t i = n * 8 + (index == DATA_FILE ? 0 : 4) + (isString ? 0 : 2) + (level - 1); + PartitionReader* reader = PartitionReadersBase::operator[](i); + const uint32_t fileId = partition.getFileId(index, isString); + if (reader == 0 || reader->getFileId() != fileId || reader->getBlockHint() != hint) { + delete reader; + PartitionReadersBase::operator[](i) = 0; + reader = partition.createReader(index, isString, hint); + PartitionReadersBase::operator[](i) = reader; + } + return reader; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// STATIC +const char* FileWriter::SUFFIX = ".tmp"; + +FileWriter::~FileWriter() { + // Releases and closes the file (clear flag is true). + IO::flush(entry_->getValue().getFile(), filename_); + const FileMode mode = getMode(); + FileCache::get().release(entry_, 0, true, true); + if (mode == FILE_MODE_CREATE) { + if (failed_) { + // Drop temporary file. + my_delete(filename_, MYF(0)); + } else { + // Rename the new file. + char newname[FN_REFLEN]; + strcpy(newname, filename_); + newname[strlen(newname) - strlen(SUFFIX)] = 0; + try { + FileUtil::rename(filename_, newname); + } catch(const SparrowException& e) { + e.toLog(); + } + } + } +} + +// Absolute seek within the file. +// The parameter length is the number of bytes the caller expects to modify after offset. +void FileWriter::seek(const uint64_t offset, const uint64_t length) _THROW_(SparrowException) { + try { + if (start_ != ULLONG_MAX && offset >= start_ && offset < start_ + limit()) { + position(offset - start_); + } else { + const uint64_t newStart = offset - (offset % sparrow_cache_block_size); + const uint64_t newLength = offset + length - newStart; + const uint64_t remaining = newStart <= size_ ? size_ - newStart : 0; + const uint64_t size = std::min(limit(), std::min(newLength, remaining)); + if (size > 0) { + uint8_t* data = getData(); + if (cacheHint_ == 0) { + entry_->getValue().read(newStart, data, static_cast(FileUtil::adjustSizeToSectorSize(size))); + } else { + // Caching is enabled: get data from the cache. + uint64_t end = newStart + size; + const uint32_t modulo = end % sparrow_cache_block_size; + if (modulo != 0) { + end += sparrow_cache_block_size - modulo; + } + const uint32_t level = cacheHint_->getLevel(); + for (uint64_t ioffset = newStart; ioffset < end; ioffset += sparrow_cache_block_size) { + BlockCacheEntry* entry = BlockCache::get().acquire(level, + FileOffset(cacheHint_->getPartitionFile(), ioffset), BlockCacheHint::smallForward3_, true, false); + assert(entry->isValid()); + FileBlock& block = entry->getValue(); + memcpy(data + ioffset - newStart, block.getData(), block.getLength()); + BlockCache::get().release(entry, entry->getLevel(), false, false); + } + } + } + start_ = newStart; + position(offset - newStart); + } + mark_ = position(); + length_ = length; + } catch(const SparrowException& e) { + failed_ = true; + throw; + } +} + +// Write buffered data to the file. +void FileWriter::write() _THROW_(SparrowException) { + try { + assert(position() >= mark_); + if (mark_ == position()) { + return; + } + uint64_t start = FileUtil::adjustPosToSectorSize(start_ + mark_); + uint64_t offset = start - start_; + uint64_t toWrite = position() - offset; + if (start + toWrite < size_) { + toWrite = FileUtil::adjustSizeToSectorSize(toWrite); + } + const uint32_t written = entry_->getValue().write(start, getData() + offset, static_cast(toWrite)); + size_ = std::max(start + written, size_); + if (cacheHint_ != 0) { + // Caching is enabled: update cache with written data. + start -= start % sparrow_cache_block_size; + uint64_t end = start_ + position(); + const uint32_t modulo = end % sparrow_cache_block_size; + if (modulo != 0) { + end += sparrow_cache_block_size - modulo; + } + BlockCacheEntriesGuard cacheEntriesGuard; + BlockCacheEntries& cacheEntries = cacheEntriesGuard.get(); + const uint32_t n = static_cast((end - start) / sparrow_cache_block_size); + FileOffset* ids = static_cast(IOContext::getTempBuffer3(n * sizeof(FileOffset))); + const PartitionFile& file = cacheHint_->getPartitionFile(); + offset = start; + for (uint32_t i = 0; i < n; ++i, offset += sparrow_cache_block_size) { + ids[i] = FileOffset(file, offset); + } + BlockCache::get().acquireMultiple(cacheHint_->getLevel(), ids, n, cacheEntries); + offset = start; + for (uint32_t j=0; j(std::min(static_cast(sparrow_cache_block_size), size_ - offset)); + const FileBlock block(getData() + offset - start_, length); + entry->getValue().replace(block); + entry->setValid(true); + offset += sparrow_cache_block_size; + } + } + } catch(const SparrowException& e) { + failed_ = true; + throw; + } +} + +} diff --git a/storage/sparrow/engine/fileutil.h b/storage/sparrow/engine/fileutil.h new file mode 100644 index 000000000000..e48a7102030b --- /dev/null +++ b/storage/sparrow/engine/fileutil.h @@ -0,0 +1,1188 @@ +/* + File utilities. +*/ + +#ifndef _engine_fileutil_h_ +#define _engine_fileutil_h_ + +#include "../handler/plugin.h" // For configuration parameters. +#include "cache.h" +#include "io.h" + +#ifdef _WIN32 +#include +#ifndef rmdir +#define rmdir _rmdir +#endif +#else +#include +#include +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Filesystem +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Filesystem { +private: + + Str path_; + uint64_t size_; + uint64_t used_; + volatile mutable uint64_t free_; + +public: + + Filesystem() : free_(0) { + } + Filesystem(const char* path) : path_(path), size_(0), used_(0), free_(0) { + } + const Str& getPath() const { + return path_; + } + uint64_t getSize() const { + return size_; + } + uint64_t getUsed() const { + return used_; + } + uint64_t getFree() const { + return Atomic::get64(&free_); + } + uint64_t computeStats(); +}; + +typedef SYSpVector Filesystems; +typedef SYSvector FilesystemIds; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DirGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DirGuard { +#ifdef _WIN32 +private: + HANDLE h_; +public: + DirGuard(const char* path, WIN32_FIND_DATA* data) _THROW_(SparrowException) { + h_ = FindFirstFile(path, data); + if (h_ == INVALID_HANDLE_VALUE) { + throw SparrowException::create(true, "Cannot open directory %s", path); + } + } + ~DirGuard() { + FindClose(h_); + } + HANDLE get() { + return h_; + } +#else +private: + DIR* dir_; +public: + DirGuard(const char* path) _THROW_(SparrowException) { + dir_ = opendir(path); + if (dir_ == 0) { + throw SparrowException::create(true, "Cannot open directory %s", path); + } + } + ~DirGuard() { + closedir(dir_); + } + DIR* get() { + return dir_; + } +#endif +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileUtil +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef SYSslist Files; + +#define COALESCING_FILESYSTEM 1000 + +class FileUtil { +private: + + static uint32_t sectorSize_; + static uint32_t pageSize_; + static const char separator_; + static Filesystems filesystems_; + static FilesystemIds filesystemIds_; + static Filesystems coalescingFilesystems_; + static FilesystemIds coalescingFilesystemIds_; + static Filesystems allFilesystems_; + static Lock lock_; + static struct rand_struct rnd_; + +private: + + static uint64_t updateStats(Filesystems& filesystems, FilesystemIds& filesystemIds, const bool coalescing); + +public: + + static void initialize() _THROW_(SparrowException); + + static void createDirectories(const char* path); + + static void deleteDirectory(const char* path); + + static void scanDirectory(const char* path, const char* extension, + const uint32_t level, Files& files, const bool forFiles) _THROW_(SparrowException); + + static const char* getParent(const char* path, char* buffer); + + static bool doesFileExist(const char* path); + + static uint64_t getFileSize(File file) _THROW_(SparrowException); + + static void rename(const char* from, const char* to) _THROW_(SparrowException); + + static Str getDatabaseName(const char* path); + + static Str getTableName(const char* path); + + // Gets the sector size of the MySQL data directory. + static uint32_t getSectorSize() { + return sectorSize_; + } + + static void getDiskStats(uint64_t& totalFree, uint64_t& totalUsed, uint64_t& totalSize); + + // Gets the system page size. + static uint32_t getPageSize() { + return pageSize_; + } + + // Adjusts the given size to a multiple of sector size. + static uint64_t adjustSizeToSectorSize(const uint64_t size) { + const uint32_t modulo = static_cast(size % sectorSize_); + if (modulo == 0) { + return size; + } else { + return size + sectorSize_ - modulo; + } + } + + // Adjusts the given position to a multiple of sector size. + static uint64_t adjustPosToSectorSize(const uint64_t pos) { + return pos - (pos % sectorSize_); + } + + // Gets the free disk space. + static uint64_t getFreeDiskSpace(); + + // Chooses the file system to write to, optionally requesting the coalescing file system. + static uint32_t chooseFilesystem(const bool coalescing); + + // Get the path of a filesystem. + static const char* getFilesystemPath(const uint32_t filesystem); + + // Get file systems, optionally including coalescing file system. + static const Filesystems& getFilesystems(const bool withCoalescing) { + return withCoalescing ? allFilesystems_ : filesystems_; + } + + // Report status of file systems. + static void report(PrintBuffer& buffer) _THROW_(SparrowException); + + static void dbg_dump_content(const char* path) _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ReferencedBlock +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ReferencedBlock : public SYSidlink { +private: + + const uint64_t offset_; + BlockCacheEntry* entry_; + +private: + + ReferencedBlock(const ReferencedBlock&); + ReferencedBlock& operator = (const ReferencedBlock&); + +public: + + ReferencedBlock(const uint64_t offset, BlockCacheEntry* entry = 0) : offset_(offset), entry_(entry) { + } + + uint64_t getOffset() const { + return offset_; + } + + BlockCacheEntry* getEntry() { + return entry_; + } + + bool operator == (const ReferencedBlock& right) const { + return offset_ == right.offset_; + } + + uint32_t hash() const { + return 31 + static_cast(offset_ ^ (offset_ >> 32)); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ReferencedBlocks +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ReferencedBlocks : public SYSpHash { +public: + + static volatile uint32_t lockedBlocks_; + static volatile uint32_t maxLockedBlocks_; + +private: + + SYSidlist lru_; + +public: + + ReferencedBlocks() : SYSpHash(16) { + } + + void reset(bool clear=false) { + while (!lru_.isEmpty()) { + ReferencedBlock* block = lru_.removeFirst(); + BlockCacheEntry* entry = block->getEntry(); + BlockCache::get().release(entry, entry->getLevel(), clear, true); + Atomic::dec32(&lockedBlocks_); + remove(block); + delete block; + } + } + + ~ReferencedBlocks() { + reset(); + } + + BlockCacheEntry* get(const FileOffset& offset, const BlockCacheHint& hint) _THROW_(SparrowException) { + assert(offset.getOffset() % sparrow_cache_block_size == 0); + const ReferencedBlock key(offset.getOffset()); + ReferencedBlock* block = find(&key); + if (block == 0) { + BlockCacheEntry* entry = 0; + while (!lru_.isEmpty() && lockedBlocks_ > maxLockedBlocks_) { + ReferencedBlock* block_t = lru_.removeFirst(); + remove(block_t); + BlockCacheEntry* old = block_t->getEntry(); + delete block_t; + if (entry == 0) { + entry = BlockCache::get().releaseAndAcquire(old, hint.getLevel(), offset, hint); + } else { + BlockCache::get().release(old, old->getLevel(), false, true); + Atomic::dec32(&lockedBlocks_); + } + } + if (entry == 0) { + entry = BlockCache::get().acquire(hint.getLevel(), offset, hint, true, true); + Atomic::inc32(&lockedBlocks_); + } + block = new ReferencedBlock(offset.getOffset(), entry); + insert(block); + lru_.append(block); + } else { + // Move block to the end of the LRU. + lru_.remove(block); + lru_.append(block); + } + BlockCacheEntry* entry = block->getEntry(); + if (!entry->isValid()) { + char name[FN_REFLEN]; + throw SparrowException::create(false, "Cannot read from file %s at offset %llu", entry->getId().getFileName(name), static_cast(key.getOffset())); + } + return entry; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Reads data from a file. +// If the file is a partition file, the reader will use the file block cache. + +class FileReader : public ByteBuffer, ByteBufferOverflow { +protected: + + ReferencedBlocks blocks_; + const uint32_t size_; + const ReadCacheHint* cacheHint_; + FileCacheEntry* entry_; // Used with non-cacheable file. + FileOffset fileOffset_; + +protected: + + void read(const uint64_t offset) _THROW_(SparrowException) { + if (entry_ == 0) { + const uint64_t adjustedOffset = offset - (offset % sparrow_cache_block_size); + fileOffset_.setOffset(adjustedOffset); + BlockCacheEntry* entry = blocks_.get(fileOffset_, cacheHint_->getBlockHint()); + if (!entry->isValid()) { + char name[FN_REFLEN]; + throw SparrowException::create(false, "Cannot read from file %s at offset %llu", entry->getId().getFileName(name), static_cast(offset)); + } + const FileBlock& block = entry->getValue(); + *static_cast(this) = ByteBuffer(block.getData(), block.getLength(), this, getVersion()); + + // Adjust position in buffer. + position(static_cast(offset % sparrow_cache_block_size)); + } else { + // Do not use cache. + fileOffset_.setOffset(offset); + limit(entry_->getValue().read(offset, data_, size_)); + position(0); + } + } + +public: + + FileReader(const PersistentPartition& partition, const uint32_t fileId, const ReadCacheHint* cacheHint) + : ByteBuffer(0, 0, this), size_(static_cast(limit())), cacheHint_(cacheHint), entry_(0), fileOffset_(partition, fileId, 0) { + } + + FileReader(const char* name) _THROW_(SparrowException) + : ByteBuffer(IOContext::getBuffer(sparrow_medium_read_block_size), this), size_(static_cast(limit())), cacheHint_(0), entry_(0) { + try { + entry_ = FileCache::get().acquire(0, FileId(name, FILE_TYPE_MISC, FILE_MODE_READ), 0, true, true); + if (!entry_->isValid()) { + FileCache::get().release(entry_, 0, true, true); + throw SparrowException::create(false, "Cannot open file %s for reading", name); + } + read(0); + } catch(const SparrowException&) { + release(); + throw; + } + } + + virtual ~FileReader() { + if (entry_ != 0) { + FileCache::get().release(entry_, 0, true, true); + } + } + + const char* getFileName(char* name) const _THROW_(SparrowException) { + return fileOffset_.getFileName(name); + } + + // When guard goes out of scope. + void release() { + if (ReferencedBlocks::lockedBlocks_ > ReferencedBlocks::maxLockedBlocks_) { + blocks_.reset(); + } + } + + // Releases cached file handle and data. Resets offset to 0. + void close(bool clear) { + position(0); + fileOffset_.setOffset(0); + if (entry_ != 0) { + FileCache::get().release(entry_, 0, true, true); + entry_ = 0; + } + blocks_.reset(clear); + } + + void open() { + + } + + // Absolute seek within the file. + uint64_t seek(const uint64_t offset) { + const uint64_t start = fileOffset_.getOffset(); + if (entry_ == 0) { + // Check if new offset inside the current block, if any. + if (!blocks_.isEmpty() && offset >= start && offset < start + limit()) { + // Yes: adjust position in buffer. + position(offset - start); + } else { + read(offset); + } + } else { + // Do not use cache. + if (offset >= start && offset < start + limit()) { + position(offset - start); + } else { + read(offset); + } + } + return offset; + } + + void overflow() override _THROW_(SparrowException) { + seek(fileOffset_.getOffset() + position()); + } + + bool end() const override { + return false; // Let actual EOF generate an exception. + } + + uint64_t getFileOffset() const { + return fileOffset_.getOffset() + position(); + } + + uint64_t getFileSize() const { + FileId key; + fileOffset_.getFileName(key.getName()); + FileCacheGuard guard(FileCache::get(), 0, key, 0, false); + const FileHandle& handle = guard.get()->getValue(); + return handle.getSize(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This class is used to write data to a new file (.tmp renamed at the end) or to update/extend an existing file. +// It has the following limited behavior: it writes data between the last seek and the current position +// when write() is called. +// It can optionnally update the file block cache with written data. + +class FileWriter : public ByteBuffer, ByteBufferOverflow { +private: + + static const char* SUFFIX; + + FileCacheEntry* entry_; + char filename_[FN_REFLEN]; + uint64_t size_; // File size. + uint64_t start_; // Adjusted offset, corresponding to position 0 in buffer. + uint64_t mark_; + uint64_t length_; + + const WriteCacheHint* cacheHint_; + + bool failed_; + +private: + + FileMode getMode() const { + return entry_->getId().getMode(); + } + +public: + + FileWriter(const char* name, const FileType type, const FileMode mode, const WriteCacheHint* cacheHint = 0, + const uint64_t offset = 0, const uint32_t length = 0) _THROW_(SparrowException) + : ByteBuffer(IOContext::getBuffer(sparrow_write_block_size), this), size_(0), start_(0), mark_(0), length_(0), cacheHint_(cacheHint), failed_(false) { + strcpy(filename_, name); + if (mode == FILE_MODE_CREATE) { + // When creating a new file, the file name is suffixed with ".tmp". + // The file is renamed when this file writer is destroyed. + strcat(filename_, SUFFIX); + } + entry_ = FileCache::get().acquire(0, FileId(filename_, type, mode), 0, true, true); + if (!entry_->isValid()) { + FileCache::get().release(entry_, 0, true, true); + throw SparrowException::create(false, "Cannot open file %s for writing", filename_); + } + if (mode == FILE_MODE_UPDATE) { + start_ = ULLONG_MAX; + size_ = entry_->getValue().getSize(); + seek(std::min(size_, offset), length); + } + } + + virtual ~FileWriter(); + + void seek(const uint64_t offset, const uint64_t length) _THROW_(SparrowException); + + void write() _THROW_(SparrowException); + + void overflow() override _THROW_(SparrowException) { + write(); + const uint64_t offset = start_ + limit(); + if (offset != size_) { + seek(offset, limit() - mark_); + } else { + start_ += limit(); + mark_ = 0; + position(0); + } + } + + bool end() const override { + return false; // No EOF when writing. + } + + uint64_t getFileOffset() const { + return start_ + position(); + } + + uint64_t getFileSize() const { + return size_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileHeader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FileHeader : public FileHeaderBase { + friend ByteBuffer& operator << (ByteBuffer& buffer, const FileHeader& header); + friend ByteBuffer& operator >> (ByteBuffer& buffer, FileHeader& header); + +private: + + static const uint32_t sizes_[]; + + uint32_t format_; // File format; see FileHeader::currentFileFormat_. + uint64_t totalSize_; // Total file size, including padding and header. + uint32_t genTime_; // Generation timestamp (seconds since epoch). + FileSection recordsSection_; // Section containing all index records. + uint32_t recordSize_; // Size of a single record. + uint32_t records_; // Number of records. + FileSection binSection_; // Section containing binary (string) data. + FileSection treeSection_; // Section containing the tree (the tree enables fast navigation). + bool treeComplete_; // True if the tree contains all index values. + uint32_t nodeSize_; // Size of a single tree node. + uint32_t nodes_; // Number of nodes. + uint32_t index_; // DATA_FILE if this is a data file, or index id if this is an index file. + uint64_t start_; // Start timestamp (milliseconds since epoch). + uint64_t end_; // End timestamp (milliseconds since epoch). + + // To browse tree (not persisted). + const TreeOrder* treeOrder_; + +public: + + static const uint8_t currentFileFormat_; + +private: + + static uint32_t computeSize(); + +public: + + FileHeader(); + FileHeader(const uint64_t binSize, const uint32_t treeSize, const bool treeComplete, const uint32_t nodeSize, const uint32_t recordSize, + const uint32_t records, const uint32_t index, const uint64_t start, const uint64_t end); + + void initialize(const uint32_t nodeSize = 0); + + uint64_t getStart() const override { + return start_; + } + + uint64_t getEnd() const override { + return end_; + } + + uint64_t getRecords() const override { + return records_; + } + + uint32_t getRecordSize() const override { + return recordSize_; + } + + bool isTreeComplete() const override { + return treeComplete_; + } + + uint32_t getNodes() const override { + return nodes_; + } + + uint32_t getMinNode() const override { + const uint32_t depth = TreeOrder::depth(nodes_); + return (1 << (depth - 1)) - 1; + } + + uint32_t getMaxNode() const override { + const uint32_t depth = TreeOrder::depth(nodes_); + if (nodes_ == static_cast((1 << depth) - 1)) { + return nodes_ - 1; + } else { + return (1 << (depth - 1)) - 2; + } + } + + uint32_t getPrevNode(const uint32_t node) const override { + return treeOrder_->getNodeIndex(treeOrder_->getListIndex(node, nodes_) - 1, nodes_); + } + + uint32_t getNextNode(const uint32_t node) const override { + return treeOrder_->getNodeIndex(treeOrder_->getListIndex(node, nodes_) + 1, nodes_); + } + + uint64_t seekTree(FileReader& reader, const uint64_t node) const override { + const uint64_t offset = node * nodeSize_; + assert(offset < treeSection_.getSize()); + return reader.seek(treeSection_.getOffset() + offset); + } + + uint64_t seekTreeData(FileReader& reader, const uint64_t node) const override { + const uint64_t offset = node * nodeSize_ + 8 /* TODO row size */; + assert(offset <= treeSection_.getSize()); + return reader.seek(treeSection_.getOffset() + offset); + } + + uint64_t seekRecord(FileReader& reader, const uint64_t record) const override { + const uint64_t offset = record * recordSize_; + assert(offset < recordsSection_.getSize()); + return reader.seek(recordsSection_.getOffset() + offset); + } + + uint64_t seekRecordData(FileReader& reader, const uint64_t record) const override { + const uint64_t offset = record * recordSize_ + (index_ == DATA_FILE ? 0 : 4) /* TODO row size */; + assert(offset < recordsSection_.getSize()); + assert(index_ == DATA_FILE || treeComplete_); + return reader.seek(recordsSection_.getOffset() + offset); + } + + uint64_t seekBin(FileReader& reader, const uint64_t offset) const override { + assert(offset < binSection_.getSize()); + return reader.seek(binSection_.getOffset() + offset); + } + + const FileSection& getStringsSection() const override { + assert(0); + return *static_cast(0); + } + + uint64_t getTotalSize() const override { + return totalSize_; + } + + uint32_t getFormat() const { + return format_; + } + + const FileSection& getBinSection() const { + return binSection_; + } + + const FileSection& getRecordsSection() const { + return recordsSection_; + } + + const FileSection& getTreeSection() const { + return treeSection_; + } + + static uint32_t size(const uint32_t format = FileHeader::currentFileFormat_) { + return sizes_[format]; + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const FileHeader& header) { + buffer << header.totalSize_ << header.genTime_ << header.recordsSection_ + << header.recordSize_ << header.records_ << header.binSection_ << header.treeSection_ + << header.treeComplete_ << header.nodeSize_ << header.nodes_ << header.index_ << header.start_ << header.end_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, FileHeader& header) { + const uint32_t version = buffer.getVersion(); // See FileHeader::currentFileFormat_. + header.format_ = version; + if (version == 1) { + uint64_t binSize; + uint32_t treeSize; + buffer >> header.totalSize_ >> header.genTime_ >> binSize + >> treeSize >> header.treeComplete_ >> header.recordSize_ >> header.records_ + >> header.index_ >> header.start_ >> header.end_; + const uint32_t headerSize = FileHeader::size(version); + header.recordsSection_ = FileSection(headerSize + treeSize + binSize, static_cast(header.records_) * header.recordSize_); + header.binSection_ = FileSection(headerSize + treeSize, binSize); + header.treeSection_ = FileSection(headerSize, treeSize); + } else if (version == 2) { + buffer >> header.totalSize_ >> header.genTime_ >> header.recordsSection_ + >> header.recordSize_ >> header.records_ >> header.binSection_ >> header.treeSection_ + >> header.treeComplete_ >> header.index_ >> header.start_ >> header.end_; + } else { + buffer >> header.totalSize_ >> header.genTime_ >> header.recordsSection_ + >> header.recordSize_ >> header.records_ >> header.binSection_ >> header.treeSection_ + >> header.treeComplete_ >> header.nodeSize_ >> header.nodes_ >> header.index_ >> header.start_ >> header.end_; + } + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DataFileHeader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DataFileHeader : public FileHeaderBase { + friend ByteBuffer& operator << (ByteBuffer& buffer, const DataFileHeader& header); + friend ByteBuffer& operator >> (ByteBuffer& buffer, DataFileHeader& header); + +private: + + uint32_t genTime_; // Generation timestamp (seconds since epoch). + FileSection recordsSection_; // Section containing all data records. + uint32_t recordSize_; // Size of a single record. + uint64_t records_; // Number of records. + FileSection stringsSection_; // Section in strings file for fast loading. + uint64_t start_; // Start timestamp (milliseconds since epoch). + uint64_t end_; // End timestamp (milliseconds since epoch). + + static const uint32_t size_; + +private: + + static uint32_t computeSize(); + +public: + + DataFileHeader(); + DataFileHeader(const uint32_t recordSize, const uint64_t records, const uint64_t stringOffset, const uint64_t stringSize, const uint64_t start, const uint64_t end); + + uint64_t getStart() const override { + return start_; + } + + uint64_t getEnd() const override { + return end_; + } + + uint64_t getRecords() const override { + return records_; + } + + uint32_t getRecordSize() const override { + return recordSize_; + } + + bool isTreeComplete() const override { + assert(0); + return false; + } + + uint32_t getNodes() const override { + assert(0); + return 0; + } + + uint32_t getMinNode() const override { + assert(0); + return 0; + } + + uint32_t getMaxNode() const override { + assert(0); + return 0; + } + + uint32_t getPrevNode(const uint32_t node) const override { + assert(0); + return 0; + } + + uint32_t getNextNode(const uint32_t node) const override { + assert(0); + return 0; + } + + uint64_t seekTree(FileReader& reader, const uint64_t node) const override { + assert(0); + return 0; + } + + uint64_t seekTreeData(FileReader& reader, const uint64_t node) const override { + assert(0); + return 0; + } + + uint64_t seekRecord(FileReader& reader, const uint64_t record) const override { + const uint64_t offset = record * recordSize_; + assert(offset < recordsSection_.getSize()); + return reader.seek(recordsSection_.getOffset() + offset); + } + + uint64_t seekRecordData(FileReader& reader, const uint64_t record) const override { + return seekRecord(reader, record); + } + + uint64_t seekBin(FileReader& reader, const uint64_t offset) const override { + assert(0); + return 0; + } + + const FileSection& getStringsSection() const override { + return stringsSection_; + } + + uint64_t getTotalSize() const override { + return FileUtil::adjustSizeToSectorSize(recordsSection_.getOffset() + recordsSection_.getSize()); + } + + const FileSection& getRecordsSection() const { + return recordsSection_; + } + + uint32_t getCacheLevel(const uint64_t offset) const { + // Level 2: contains data records from recent queries. + return 2; + } + + static uint32_t size() { + return size_; + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const DataFileHeader& header) { + buffer << header.genTime_ << header.recordsSection_ << header.recordSize_ + << header.stringsSection_ << header.start_ << header.end_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, DataFileHeader& header) { + buffer >> header.genTime_ >> header.recordsSection_ >> header.recordSize_ + >> header.stringsSection_ >> header.start_ >> header.end_; + header.records_ = header.recordsSection_.getCount("records", header.recordSize_); + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IndexFileHeader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IndexFileHeader : public FileHeaderBase { + friend ByteBuffer& operator << (ByteBuffer& buffer, const IndexFileHeader& header); + friend ByteBuffer& operator >> (ByteBuffer& buffer, IndexFileHeader& header); + +private: + + uint32_t genTime_; // Generation timestamp (seconds since epoch). + uint32_t index_; // Index id. + FileSection recordsSection_; // Section containing all index records. + uint32_t recordSize_; // Size of a single record. + uint64_t records_; // Number of records. + FileSection treeSection_; // Section containing all tree records. + uint32_t nodeSize_; // Size of a single tree node. + uint64_t nodes_; // Number of nodes. + uint64_t start_; // Start timestamp (milliseconds since epoch). + uint64_t end_; // End timestamp (milliseconds since epoch). + + // To browse tree (not persisted). + const TreeOrder* treeOrder_; + + static const uint32_t size_; + +private: + + static uint32_t computeSize(); + +public: + + IndexFileHeader(); + IndexFileHeader(const uint32_t index, const uint32_t recordSize, const uint64_t records, + const uint32_t nodeSize, const uint64_t nodes, const uint64_t start, const uint64_t end); + + uint64_t getStart() const override { + return start_; + } + + uint64_t getEnd() const override { + return end_; + } + + uint64_t getRecords() const override { + return records_; + } + + uint32_t getRecordSize() const override { + return recordSize_; + } + + bool isTreeComplete() const override { + return true; + } + + uint32_t getNodes() const override { + return static_cast(nodes_); + } + + uint32_t getMinNode() const override { + const uint32_t depth = TreeOrder::depth(static_cast(nodes_)); + return (1 << (depth - 1)) - 1; + } + + uint32_t getMaxNode() const override { + const uint32_t depth = TreeOrder::depth(static_cast(nodes_)); + if (nodes_ == static_cast((1 << depth) - 1)) { + return static_cast(nodes_ - 1); + } else { + return (1 << (depth - 1)) - 2; + } + } + + uint32_t getPrevNode(const uint32_t node) const override { + return treeOrder_->getNodeIndex(treeOrder_->getListIndex(node, static_cast(nodes_)) - 1, static_cast(nodes_)); + } + + uint32_t getNextNode(const uint32_t node) const override { + return treeOrder_->getNodeIndex(treeOrder_->getListIndex(node, static_cast(nodes_)) + 1, static_cast(nodes_)); + } + + uint64_t seekTree(FileReader& reader, const uint64_t node) const override { + const uint64_t offset = node * nodeSize_; + assert(offset < treeSection_.getSize()); + return reader.seek(treeSection_.getOffset() + offset); + } + + uint64_t seekTreeData(FileReader& reader, const uint64_t node) const override { + const uint64_t offset = node * nodeSize_ + 8 /* TODO row size */; + assert(offset <= treeSection_.getSize()); + return reader.seek(treeSection_.getOffset() + offset); + } + + uint64_t seekRecord(FileReader& reader, const uint64_t record) const override { + const uint64_t offset = record * recordSize_; + assert(offset < recordsSection_.getSize()); + return reader.seek(recordsSection_.getOffset() + offset); + } + + uint64_t seekRecordData(FileReader& reader, const uint64_t record) const override { + assert(0); + return 0; + } + + uint64_t seekBin(FileReader& reader, const uint64_t offset) const override { + assert(0); + return 0; + } + + const FileSection& getStringsSection() const override { + assert(0); + return *static_cast(0); + } + + uint64_t getTotalSize() const override { + return FileUtil::adjustSizeToSectorSize(treeSection_.getOffset() + treeSection_.getSize()); + } + + static uint32_t size() { + return size_; + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const IndexFileHeader& header) { + buffer << header.genTime_ << header.index_ << header.recordsSection_ << header.recordSize_ + << header.treeSection_ << header.nodeSize_ << header.start_ << header.end_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, IndexFileHeader& header) { + buffer >> header.genTime_ >> header.index_ >> header.recordsSection_ >> header.recordSize_ + >> header.treeSection_ >> header.nodeSize_ >> header.start_ >> header.end_; + header.nodes_ = header.treeSection_.getCount("nodes", header.nodeSize_); + header.treeOrder_ = &TreeOrder::get(static_cast(header.nodes_)); + header.records_ = header.recordsSection_.getCount("records", header.recordSize_); + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// StringFileHeader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class StringFileHeader : public FileHeaderBase { + friend ByteBuffer& operator >> (ByteBuffer& buffer, StringFileHeader& header); + +public: + + StringFileHeader() { + } + + uint64_t getStart() const override { + assert(0); + return 0; + } + + uint64_t getEnd() const override { + assert(0); + return 0; + } + + uint64_t getRecords() const override { + assert(0); + return 0; + } + + uint32_t getRecordSize() const override { + assert(0); + return 0; + } + + bool isTreeComplete() const override { + assert(0); + return false; + } + + uint32_t getNodes() const override { + assert(0); + return 0; + } + + uint32_t getMinNode() const override { + assert(0); + return 0; + } + + uint32_t getMaxNode() const override { + assert(0); + return 0; + } + + uint32_t getPrevNode(const uint32_t node) const override { + assert(0); + return 0; + } + + uint32_t getNextNode(const uint32_t node) const override { + assert(0); + return 0; + } + + uint64_t seekTree(FileReader& reader, const uint64_t node) const override { + assert(0); + return 0; + } + + uint64_t seekTreeData(FileReader& reader, const uint64_t node) const override { + assert(0); + return 0; + } + + uint64_t seekRecord(FileReader& reader, const uint64_t record) const override { + assert(0); + return 0; + } + + uint64_t seekRecordData(FileReader& reader, const uint64_t record) const override { + assert(0); + return 0; + } + + uint64_t seekBin(FileReader& reader, const uint64_t offset) const override { + reader.seek(offset); + return offset; + } + + const FileSection& getStringsSection() const override { + assert(0); + return *static_cast(0); + } + + uint64_t getTotalSize() const override { + assert(0); + return 0; + } +}; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, StringFileHeader& header) { + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PartitionReader : public ReadCacheHint, public FileReader { +protected: + + const BlockCacheHint& hint_; + const uint32_t columnAlterSerial_; + FileHeaderBase* header_; + +private: + + PartitionReader(const PartitionReader&); + PartitionReader& operator = (const PartitionReader&); + +public: + + PartitionReader(const PersistentPartition& partition, const uint32_t fileId, const BlockCacheHint& hint) _THROW_(SparrowException); + + virtual ~PartitionReader() { + delete header_; + } + + uint32_t getFileId() const { + return fileOffset_.getFileId(); + } + + const BlockCacheHint& getBlockHint() const override { + return hint_; + } + + uint32_t getColumnAlterSerial() const { + return columnAlterSerial_; + } + + uint64_t seekTree(const uint64_t node) { + return header_->seekTree(*this, node); + } + + uint64_t seekTreeData(const uint64_t node) { + return header_->seekTreeData(*this, node); + } + + uint64_t seekRecord(const uint64_t record) { + return header_->seekRecord(*this, record); + } + + uint64_t seekRecordData(const uint64_t record) { + return header_->seekRecordData(*this, record); + } + + uint64_t seekBin(const uint64_t offset) { + return header_->seekBin(*this, offset); + } + + const FileHeaderBase& getHeader() const { + return *header_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionReaders +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef SYSpVector PartitionReadersBase; + +// Keep readers for both data and index files. +class PartitionReaders : private PartitionReadersBase { +private: + + PartitionReaders& operator = (const PartitionReaders& right); + PartitionReaders(const PartitionReaders& right); + +public: + + void initialize(const uint32_t nbPartitions) { + clearAndDestroy(); + resize(nbPartitions * 8); + for (uint32_t i = 0; i < PartitionReadersBase::capacity(); ++i) { + PartitionReadersBase::append(0); + } + } + + PartitionReaders(const uint32_t nbPartitions = 0) { + initialize(nbPartitions); + } + + void clear() { + clearAndDestroy(); + } + + ~PartitionReaders() { + clearAndDestroy(); + } + + PartitionReader* get(const uint32_t n, PersistentPartition& partition, const uint32_t index, const bool isString, const BlockCacheHint& hint) _THROW_(SparrowException); +}; + +} + +#endif /* #ifndef _engine_fileutil_h_ */ diff --git a/storage/sparrow/engine/flush.cc b/storage/sparrow/engine/flush.cc new file mode 100644 index 000000000000..1457421678e2 --- /dev/null +++ b/storage/sparrow/engine/flush.cc @@ -0,0 +1,96 @@ +/* + Partition flush. +*/ + +#include "flush.h" +#include "internalapi.h" +#include "hash.h" +#include "fileutil.h" + +#include "../engine/log.h" + + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FlushAllTask +////////////////////////////////////////////////////////////////////////////////////////////////////// +Lock FlushAllTask::lock_(true, "FlushAllTask::lock_"); +FlushAllTask* FlushAllTask::task_ = 0; + +void FlushAllTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + InternalApi::flushAll(false); + { + Guard guard(lock_); + task_ = 0; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FlushJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void FlushJob::process() { + // Check that no one else is already flushing into that main partition + mainPartition_->getMaster().startFlush(mainPartition_->getSerial()); + + const uint32_t nbWorkers = workerJobs_.length(); + const uint32_t nbWriters = writerJobs_.length(); + partition_->setJobCounter(nbWorkers + nbWriters); + + // Send Worker jobs to Worker thread pool, and Writer jobs to Writer thread pool. + for (uint32_t i = 0; i < nbWorkers; ++i) { + Worker::sendJob(workerJobs_[i]); + } + for (uint32_t i = 0; i < nbWriters; ++i) { + Writer::sendJob(writerJobs_[i]); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// StringJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void StringJob::process() { + try { + partition_->flushStrings(mainPartition_); + } catch(const SparrowException& e) { + partition_->error(); + spw_print_error("Failed to flush strings from partition %s.%s.%llu (try %u): %s", + partition_->getMaster()->getDatabase().c_str(), partition_->getMaster()->getTable().c_str(), static_cast(partition_->getSerial()), partition_->getNbFlushTries(), e.getText()); + partition_->endFlush(mainPartition_); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IndexJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void IndexJob::process() { + partition_->compute(mainPartition_, id_); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// WriteJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void WriteJob::process() { + if (id_ == DATA_FILE) { + partition_->getMaster()->blockUpdate(); + } + try { + partition_->write(mainPartition_, id_, indirector_); + } catch(const SparrowException& e) { + partition_->error(); + if (id_ == DATA_FILE) { + spw_print_error("Failed to flush data from partition %s.%s.%llu (try %u): %s", + partition_->getMaster()->getDatabase().c_str(), partition_->getMaster()->getTable().c_str(), static_cast(partition_->getSerial()), partition_->getNbFlushTries(), e.getText()); + } else { + spw_print_error("Failed to flush index %u from partition %s.%s.%llu (try %u): %s", + id_, partition_->getMaster()->getDatabase().c_str(), partition_->getMaster()->getTable().c_str(), static_cast(partition_->getSerial()), partition_->getNbFlushTries(), e.getText()); + } + } + partition_->endFlush(mainPartition_); +} + +} diff --git a/storage/sparrow/engine/flush.h b/storage/sparrow/engine/flush.h new file mode 100644 index 000000000000..828b4ad27e3e --- /dev/null +++ b/storage/sparrow/engine/flush.h @@ -0,0 +1,212 @@ +/* + Partition flush. +*/ + +#ifndef _engine_flush_h_ +#define _engine_flush_h_ + +#include "transient.h" +#include "persistent.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FlushTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FlushTask : public TransientTask { +private: + + void process(TransientPartition& partition, const uint64_t timestamp) override _THROW_(SparrowException) { + partition.flush(timestamp); + } + +public: + + FlushTask(Master* master, const uint64_t serial) : TransientTask(master, serial, Flush::getQueue()) { + Atomic::inc32(&SparrowStatus::get().tasksPendingFlushTasks_); + } + + ~FlushTask() { + Atomic::dec32(&SparrowStatus::get().tasksPendingFlushTasks_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FlushAllTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FlushAllTask : public Task { +private: + static Lock lock_; + static FlushAllTask* task_; + +public: + + FlushAllTask() : Task(Flush::getQueue()) { + Atomic::inc32(&SparrowStatus::get().tasksPendingFlushAllTasks_); + } + + ~FlushAllTask() { + Atomic::dec32(&SparrowStatus::get().tasksPendingFlushAllTasks_); + } + + virtual bool operator == (const FlushAllTask& right) const { + return true; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); + + static bool flushAll(const uint64_t timestamp = 0) { + Guard guard(lock_); + if (task_ != 0) { + return false; + } + task_ = new FlushAllTask(); + Scheduler::addTask(task_, timestamp, true); + return task_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TransientJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TransientJob : public Job, public MasterDependency { +protected: + + TransientPartitionGuard partition_; + PersistentPartitionGuard mainPartition_; + +public: + + TransientJob(TransientPartition* partition, PersistentPartitionGuard mainPartition) + : MasterDependency(partition->getMaster()), partition_(partition), mainPartition_(mainPartition) { + } + + virtual ~TransientJob() { + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FlushJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FlushJob : public TransientJob { +private: + + Jobs workerJobs_; + Jobs writerJobs_; + +public: + + FlushJob(TransientPartition* partition, PersistentPartitionGuard mainPartition, const uint32_t workerJobs, const uint32_t writerJobs) + : TransientJob(partition, mainPartition), workerJobs_(workerJobs), writerJobs_(writerJobs) { + Atomic::inc32(&SparrowStatus::get().tasksPendingFlushJobs_); + } + + virtual ~FlushJob() { + Atomic::dec32(&SparrowStatus::get().tasksPendingFlushJobs_); + } + + void process() override; + + void stop() override { + } + + Jobs& getWorkerJobs() { + return workerJobs_; + } + + Jobs& getWriterJobs() { + return writerJobs_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// StringJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class StringJob : public TransientJob { +public: + + StringJob(TransientPartition* partition, PersistentPartitionGuard mainPartition) + : TransientJob(partition, mainPartition) { + Atomic::inc32(&SparrowStatus::get().tasksPendingStringJobs_); + } + + virtual ~StringJob() { + Atomic::dec32(&SparrowStatus::get().tasksPendingStringJobs_); + } + + void process() override; + + void stop() override { + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IndexJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IndexJob : public TransientJob { +private: + + const uint32_t id_; + +public: + + IndexJob(TransientPartition* partition, PersistentPartitionGuard mainPartition, const uint32_t id) + : TransientJob(partition, mainPartition), id_(id) { + Atomic::inc32(&SparrowStatus::get().tasksPendingIndexJobs_); + } + + virtual ~IndexJob() { + Atomic::dec32(&SparrowStatus::get().tasksPendingIndexJobs_); + } + + void process() override; + + void stop() override { + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// WriteJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class WriteJob : public TransientJob { +private: + + const uint32_t id_; + Indirector* indirector_; + +public: + + WriteJob(TransientPartition* partition, PersistentPartitionGuard mainPartition, const uint32_t id, Indirector* indirector) + : TransientJob(partition, mainPartition), id_(id), indirector_(indirector) { + Atomic::inc32(&SparrowStatus::get().tasksPendingWriteJobs_); + } + + virtual ~WriteJob() { + Atomic::dec32(&SparrowStatus::get().tasksPendingWriteJobs_); + delete indirector_; + } + + void process() override; + + void stop() override { + } +}; + +} + +#endif /* #ifndef _engine_flush_h_ */ diff --git a/storage/sparrow/engine/hash.h b/storage/sparrow/engine/hash.h new file mode 100644 index 000000000000..46d04a1ee4c7 --- /dev/null +++ b/storage/sparrow/engine/hash.h @@ -0,0 +1,768 @@ +/* + Hash table types. + */ + +#ifndef _engine_hash_h_ +#define _engine_hash_h_ + +#include "vec.h" + +namespace Sparrow { + +// spread hash code +static inline uint32_t spreadHashCode(uint32_t h) { + h += ~(h << 9); + h ^= (h >> 14); + h += (h << 4); + h ^= (h >> 10); + return h; +} + +template class SYShlink { +public: + + SYShlink(const T& object, const uint32_t hash, SYShlink* next); + SYShlink* getNext() const; + void setNext(SYShlink* next); + const T& getObject() const; + T& getObject(); + void setObject(const T& object); + void setHash(const uint32_t hash); + uint32_t getHash() const; + +protected: + + uint32_t hash_; + SYShlink* next_; + T object_; +}; + +template inline SYShlink::SYShlink(const T& object, const uint32_t hash, SYShlink* next) : + hash_(hash), next_(next), object_(object) { +} + +template inline SYShlink* SYShlink::getNext() const { + return next_; +} + +template inline void SYShlink::setNext(SYShlink* next) { + next_ = next; +} + +template inline const T& SYShlink::getObject() const { + return object_; +} + +template inline T& SYShlink::getObject() { + return object_; +} + +template inline void SYShlink::setObject(const T& object) { + object_ = object; +} + +template inline void SYShlink::setHash(const uint32_t hash) { + hash_ = hash; +} + +template inline uint32_t SYShlink::getHash() const { + return hash_; +} + +// +// Default allocator for hash tables. +// +template class SYShAllocator { +private: + + uint32_t n_; + +public: + + SYShAllocator() : n_(0) { + } + + SYShlink* acquire(const T& object, const uint32_t hash, SYShlink* next) { + n_++; + return new SYShlink(object, hash, next); + } + + void release(SYShlink* link) { + n_--; + delete link; + } +}; + +// +// Pool allocator for hash tables. +// +template class SYShPoolAllocator { +private: + + SYShlink* root_; + +public: + + SYShPoolAllocator() : + root_(0) { + } + ~SYShPoolAllocator() { + SYShlink* link = root_; + while (link != 0) { + SYShlink* next = link->getNext(); + delete link; + link = next; + } + } + SYShlink* acquire(const T& object, const uint32_t hash, SYShlink* next) { + if (root_ == 0) { + return new SYShlink(object, hash, next); + } else { + SYShlink* link = root_; + root_ = root_->getNext(); + link->setObject(object); + link->setHash(hash); + link->setNext(next); + return link; + } + } + void release(SYShlink* link) { + link->setNext(root_); + root_ = link; + } +}; + +template class SYShashBase { +protected: + + const uint32_t initial_; + SYSpVector , 0> vector_; + uint32_t items_; + +protected: + + void initialize(SYSpVector , 0>& vector); + uint32_t initialize(); + void extend(); + +public: + + SYShashBase(const uint32_t initial); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + int64_t getSize() const; +}; + +template inline void SYShashBase::initialize(SYSpVector , 0>& vector) { + const uint32_t length = vector.length(); + for (uint32_t i = 0; i < length; ++i) { + vector[i] = 0; + } +} + +template inline uint32_t SYShashBase::initialize() { + if (vector_.isEmpty()) { + vector_.reshape(initial_); + initialize(vector_); + } + return vector_.length(); +} + +template inline void SYShashBase::extend() { + const uint32_t length = vector_.length(); + if (length != 0) { + SYSpVector , 0> vector; + const uint32_t buckets = length * 2; + vector.reshape(buckets); + initialize(vector); + for (uint32_t i = 0; i < length; ++i) { + SYShlink* sl = vector_[i]; + SYShlink* nsl = 0; + while (sl != 0) { + nsl = sl->getNext(); + const uint32_t h = sl->getHash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + sl->setNext(vector[bucket]); + vector[bucket] = sl; + sl = nsl; + } + } + vector_ = vector; + } +} + +template inline uint32_t SYShashBase::entries() const { + return items_; +} + +template inline bool SYShashBase::isEmpty() const { + return (entries() == 0); +} + +template inline int64_t SYShashBase::getSize() const { + return entries() * sizeof(SYShlink); +} + +template inline SYShashBase::SYShashBase(const uint32_t initial) : initial_(initial == 0 ? 1 : initial), items_(0) { +} + +template class SYShashIterator; + +template > class SYShash: public SYShashBase, public A { + + friend class SYShashIterator ; + +public: + + // constructors + SYShash(const uint32_t initial); + SYShash(const SYShash& right); + + // destructor + ~SYShash(); + + // operations + void insert(const T& t); + T* insertAndReturn(const T& t); + bool remove(const T& t); + bool contains(const T& t) const; + void clear(); + bool find(const T& t, T& r) const; + T* find(const T& t) const; + + // copy + SYShash& operator =(const SYShash& right); + + // equality + bool operator ==(const SYShash& right) const; +}; + +template inline SYShash::SYShash(const uint32_t initial) + : SYShashBase(initial) { +} + +template inline void SYShash::clear() { + const uint32_t length = this->vector_.length(); + for (uint32_t bucket = 0; bucket < length; ++bucket) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + SYShlink* next = sl->getNext(); + this->release(sl); + sl = next; + } + } + this->vector_.clear(); + this->items_ = 0; +} + +template inline SYShash::~SYShash() { + clear(); +} + +template inline T* SYShash::insertAndReturn(const T& t) { + const uint32_t buckets = this->initialize(); + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + SYShlink* sl = this->acquire(t, h, this->vector_[bucket]); + this->vector_[bucket] = sl; + this->items_++; + if (this->items_ == this->vector_.length()) { + this->extend(); + } + return static_cast(&sl->getObject()); +} + +template inline void SYShash::insert(const T& t) { + insertAndReturn(t); +} + +template inline bool SYShash::contains(const T& t) const { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + return true; + } + sl = sl->getNext(); + } + return false; +} + +template inline bool SYShash::remove(const T& t) { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + SYShlink* sl = this->vector_[bucket]; + SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + if (psl == 0) { + this->vector_[bucket] = sl->getNext(); + } else { + psl->setNext(sl->getNext()); + } + this->release(sl); + this->items_--; + return true; + } + psl = sl; + sl = sl->getNext(); + } + return false; +} + +template inline bool SYShash::find(const T& t, T& r) const { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + const SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + r = sl->getObject(); + return true; + } + psl = sl; + sl = sl->getNext(); + } + return false; +} + +template inline T* SYShash::find(const T& t) const { + const uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return 0; + } + const uint32_t h = t.hash(); + const uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (sl->getHash() == h && sl->getObject() == t) { + return const_cast(&sl->getObject()); + } + sl = sl->getNext(); + } + return 0; +} + +template inline bool SYShash::operator ==(const SYShash& right) const { + if (this->entries() != right.entries()) { + return false; + } + const uint32_t length = this->vector_.length(); + for (uint32_t bucket = 0; bucket < length; ++bucket) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (!right.contains(sl->getObject())) { + return false; + } + sl = sl->getNext(); + } + } + return true; +} + +template > class SYShashIterator { +public: + + // constructors + SYShashIterator(SYShash& hash); + SYShashIterator(const SYShash& hash); + + // operators + bool operator ++(); + bool operator ()(); + + // operations + void reset(); + const T& key() const; + T& key(); + +private: + + // copy, assignment and equality are forbidden + SYShashIterator(const SYShashIterator& right); + SYShashIterator& operator =(const SYShashIterator& right); + bool operator ==(const SYShashIterator& right) const; + +protected: + + SYShash& hash_; + uint32_t bucket_; + SYShlink* sl_; +}; + +template inline void SYShashIterator::reset() { + bucket_ = SYS_NPOS; + sl_ = 0; +} + +template inline SYShashIterator::SYShashIterator(SYShash& hash) : hash_(hash) { + reset(); +} + +template inline SYShashIterator::SYShashIterator(const SYShash& hash) : hash_(const_cast& >(hash)) { + reset(); +} + +template inline bool SYShashIterator::operator ++() { + if (sl_ != 0) { + sl_ = sl_->getNext(); + } + while (sl_ == 0) { + bucket_++; // wrapping + if (bucket_ >= hash_.vector_.length()) { + return false; + } + sl_ = hash_.vector_[bucket_]; + } + return true; +} + +template inline bool SYShashIterator::operator ()() { + return ++(*this); +} + +template inline const T& SYShashIterator::key() const { + return sl_->getObject(); +} + +template inline T& SYShashIterator::key() { + return sl_->getObject(); +} + +// copy operator/constructor for SYShash: need iterator +template inline SYShash& SYShash::operator =( + const SYShash& right) { + if (this == &right) { + return *this; + } + clear(); + SYShashIterator iterator(right); + while (iterator()) { + insert(iterator.key()); + } + return *this; +} + +template inline SYShash::SYShash(const SYShash& right) + : SYShashBase(right.initial_) { + *this = right; +} + +template class SYSpHashIterator; + +template > class SYSpHash: public SYShashBase, public A { + + friend class SYSpHashIterator ; + +public: + + // constructors + SYSpHash(const uint32_t initial); + SYSpHash(const SYSpHash& right); + + // destructor + ~SYSpHash(); + + // operations + void insert(T* t); + T* remove(const T* t); + bool contains(const T* t) const; + void clear(); + void clearAndDestroy(); + T* find(const T* t) const; + + // copy + SYSpHash& operator = (const SYSpHash& right); + + // equality + bool operator ==(const SYSpHash& right) const; +}; + +template inline SYSpHash::SYSpHash(const uint32_t initial) : SYShashBase(initial) { +} + +template inline void SYSpHash::clear() { + uint32_t buckets = this->vector_.length(); + uint32_t bucket; + for (bucket = 0; bucket < buckets; ++bucket) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + SYShlink* next = sl->getNext(); + this->release(sl); + sl = next; + } + } + this->vector_.clear(); + this->items_ = 0; +} + +template inline void SYSpHash::clearAndDestroy() { + uint32_t buckets = this->vector_.length(); + uint32_t bucket; + for (bucket = 0; bucket < buckets; ++bucket) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + SYShlink* next = sl->getNext(); + delete sl->getObject(); + this->release(sl); + sl = next; + } + } + this->vector_.clear(); + this->items_ = 0; +} + +template inline SYSpHash::~SYSpHash() { + clear(); +} + +template inline void SYSpHash::insert(T* t) { + uint32_t buckets = this->initialize(); + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + this->vector_[bucket] = this->acquire(t, h, this->vector_[bucket]); + this->items_++; + if (this->items_ == this->vector_.length()) { + this->extend(); + } +} + +template inline bool SYSpHash::contains(const T* t) const { + uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return false; + } + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (sl->getHash() == h && *(sl->getObject()) == *t) { + return true; + } + sl = sl->getNext(); + } + return false; +} + +template inline T* SYSpHash::remove(const T* t) { + uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return 0; + } + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + SYShlink* sl = this->vector_[bucket]; + SYShlink* psl = 0; + while (sl != 0) { + if (sl->getHash() == h && *(sl->getObject()) == *t) { + if (psl == 0) { + this->vector_[bucket] = sl->getNext(); + } else { + psl->setNext(sl->getNext()); + } + T* result = sl->getObject(); + this->release(sl); + this->items_--; + return result; + } + psl = sl; + sl = sl->getNext(); + } + return 0; +} + +template inline T* SYSpHash::find(const T* t) const { + uint32_t buckets = this->vector_.length(); + if (buckets == 0) { + return 0; + } + uint32_t h = t->hash(); + uint32_t bucket = (spreadHashCode(h) % buckets); + const SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (sl->getHash() == h && *(sl->getObject()) == *t) { + return sl->getObject(); + } + sl = sl->getNext(); + } + return 0; +} + +template inline bool SYSpHash::operator ==(const SYSpHash& right) const { + if (this->entries() != right.entries()) { + return false; + } + const uint32_t length = this->vector_.length(); + for (uint32_t bucket = 0; bucket < length; ++bucket) { + SYShlink* sl = this->vector_[bucket]; + while (sl != 0) { + if (!right.contains(*sl->getObject())) { + return false; + } + sl = sl->getNext(); + } + } + return true; +} + +template > class SYSpHashIterator { +public: + + // constructor + SYSpHashIterator(SYSpHash& hash); + SYSpHashIterator(const SYSpHash& hash); + + // operators + T* operator ++(); + T* operator ()(); + + // operations + void reset(); + const T* key() const; + T* key(); + +private: + + // copy, assignment and equality are forbidden + SYSpHashIterator(const SYSpHashIterator& right); + SYSpHashIterator& operator =(const SYSpHashIterator& right); + bool operator ==(const SYSpHashIterator& right) const; + +protected: + + SYSpHash& hash_; + uint32_t bucket_; + SYShlink* sl_; +}; + +template inline void SYSpHashIterator::reset() { + bucket_ = SYS_NPOS; + sl_ = 0; +} + +template inline SYSpHashIterator::SYSpHashIterator(SYSpHash& hash) : hash_(hash) { + reset(); +} + +template inline SYSpHashIterator::SYSpHashIterator(const SYSpHash& hash) : hash_((SYSpHash&)hash) { + reset(); +} + +template inline T* SYSpHashIterator::operator ++() { + if (sl_ != 0) { + sl_ = sl_->getNext(); + } + while (sl_ == 0) { + bucket_++; // wrapping + if (bucket_ >= hash_.vector_.length()) { + return 0; + } + sl_ = hash_.vector_[bucket_]; + } + return sl_->getObject(); +} + +template inline T* SYSpHashIterator::operator ()() { + return ++(*this); +} + +template inline const T* SYSpHashIterator::key() const { + return sl_->getObject(); +} + +template inline T* SYSpHashIterator::key() { + return sl_->getObject(); +} + +// copy operator/constructor for SYShash: need iterator +template inline SYSpHash& SYSpHash::operator =(const SYSpHash& right) { + if (this == &right) { + return *this; + } + clear(); + SYSpHashIterator iterator(right); + while (iterator()) { + insert(iterator.key()); + } + return *this; +} + +template inline SYSpHash::SYSpHash(const SYSpHash& right) : SYShashBase(right.initial_) { + *this = right; +} + +// Key/value entry to build a map. + +template class Entry { +private: + + K key_; + V value_; + +public: + + Entry(); + Entry(const K& key); + Entry(const K& key, const V& value); + ~Entry(); + bool operator ==(const Entry& right) const; + Entry& operator =(const Entry& right); + uint32_t hash() const; + const K& getKey() const; + const V& getValue() const; +}; + +template inline Entry::Entry() : key_(), value_() { +} + +template inline Entry::Entry(const K& key) : key_(key), value_() { +} + +template inline Entry::Entry(const K& key, const V& value) : key_(key), value_(value) { +} + +template inline Entry::~Entry() { +} + +template inline bool Entry::operator ==(const Entry& right) const { + return key_ == right.key_; +} + +template inline Entry& Entry::operator =(const Entry& right) { + if (this != &right) { + key_ = right.key_; + value_ = right.value_; + } + return *this; +} + +template inline uint32_t Entry::hash() const { + return key_.hash(); +} + +template inline const K& Entry::getKey() const { + return key_; +} + +template inline const V& Entry::getValue() const { + return value_; +} + +} + +#endif /* #ifndef _engine_hash_h_ */ diff --git a/storage/sparrow/engine/internalapi.cc b/storage/sparrow/engine/internalapi.cc new file mode 100644 index 000000000000..46b562109429 --- /dev/null +++ b/storage/sparrow/engine/internalapi.cc @@ -0,0 +1,1091 @@ +/* + Internal API. +*/ + +#define MYSQL_SERVER 1 +#include "../handler/plugin.h" // For configuration parameters. +#include "internalapi.h" +#include "master.h" +#include "fileutil.h" +#include "transient.h" +#include "coalescing.h" +#include "purge.h" + +#include "mysql.h" +#include "sql/mysqld.h" +#include "sql/current_thd.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// InternalApi +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SYSpHash InternalApi::hash_(64); +Lock InternalApi::hashLock_(true, "InternalApi::hashLock_"); +volatile uint32_t InternalApi::flushOnGoing_ = 0; + +// Setup: lists all master files by scanning data directory for spm files. +// STATIC +void InternalApi::setup() _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::setup"); + Files files; + FileUtil::scanDirectory(mysql_real_data_home, ".spm", 2, files, true); + SYSslistIterator iterator(files); + while (++iterator) { + const Str& file = iterator.key(); + const Str database = FileUtil::getDatabaseName(file.c_str()); + const Str table = FileUtil::getTableName(file.c_str()); + InternalApi::get(database.c_str(), table.c_str(), true, false, 0); + } + + // Make room if necessary. + Purge::wakeUp(); + Scheduler::addTask(new PurgeTask()); +} + +typedef Entry NamedColumn; +typedef SYSvector NamedColumns; +typedef Pair AddedColumn; +typedef SYSvector AddedColumns; + +// Initializes table. +// STATIC +void InternalApi::init(const char* username, const char* password, const char* database, const char* table, + const ColumnExs& columns, const Indexes& indexes, const ForeignKeys& foreignKeys, const DnsConfiguration& dnsConfiguration, + const uint32_t aggregationPeriod, const uint64_t defaultWhere, const uint64_t stringOptimization, + const uint64_t maxLifetime, const uint64_t coalescingPeriod) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::init"); + char tmp[16 * 1024]; + bool create = false; + Str s; + { + MasterGuard master; + { + Guard guard(hashLock_); + const Master key(database, table, true); + master = hash_.find(&key); + } + if (master == 0) { + create = true; + } else { + // The table already exists. Check if the definition has changed. Get the table definition from MySQL. + Str ldatabase(database); + ldatabase.toLower(); + Str ltable(table); + ltable.toLower(); + MySQLGuard mysql(username, password); + snprintf(tmp, sizeof(tmp), "select column_name as name," + " data_type as type, character_maximum_length as length, datetime_precision" + " from information_schema.columns" + " where table_schema='%s' and table_name='%s'" + " order by ordinal_position", ldatabase.c_str(), ltable.c_str()); + mysql.execute(tmp); + + ReadGuard guard(master->getLock()); + const Columns& masterColumns = master->getColumns(); + const Indexes& masterIndexes = master->getIndexes(); +#ifndef NDEBUG + DBUG_PRINT("sparrow_api", ("Table %s.%s already exists", database, table)); + DBUG_PRINT("sparrow_api", ("Current columns:")); + for (uint32_t i = 0; i < masterColumns.length(); ++i) { + const Column& column = masterColumns[i]; + if (column.isDropped()) { + continue; + } + DBUG_PRINT("sparrow_api", ("Column %u: %s - %s", i, column.getName().c_str(), ColumnEx::getSqlType(column.getType()))); + } + DBUG_PRINT("sparrow_api", ("Current indexes:")); + for (uint32_t i = 0; i < masterIndexes.length(); ++i) { + const Index& index = masterIndexes[i]; + if (index.isDropped()) { + continue; + } + Str scolumns; + const ColumnIds& ids = index.getColumnIds(); + for (uint32_t j = 0; j < ids.length(); ++j) { + if (j > 0) { + scolumns += Str(", "); + } + scolumns += masterColumns[ids[j]].getName(); + } + DBUG_PRINT("sparrow_api", ("Index %u: %s - %s", i, index.getName().c_str(), scolumns.c_str())); + } + DBUG_PRINT("sparrow_api", ("New columns:")); + for (uint32_t i = 0; i < columns.length(); ++i) { + const Column& column = columns[i]; + DBUG_PRINT("sparrow_api", ("Column %u: %s - %s", i, column.getName().c_str(), ColumnEx::getSqlType(column.getType()))); + } + DBUG_PRINT("sparrow_api", ("New indexes:")); + for (uint32_t i = 0; i < indexes.length(); ++i) { + const Index& index = indexes[i]; + Str scolumns; + const ColumnIds& ids = index.getColumnIds(); + for (uint32_t j = 0; j < ids.length(); ++j) { + if (j > 0) { + scolumns += Str(", "); + } + scolumns += columns[ids[j]].getName(); + } + DBUG_PRINT("sparrow_api", ("Index %u: %s - %s", i, index.getName().c_str(), scolumns.c_str())); + } +#endif + NamedColumns newColumnsByName(columns.length()); + for (uint32_t i = 0; i < columns.length(); ++i) { + const ColumnEx& column = columns[i]; + newColumnsByName.append(NamedColumn(column.getName(), column)); + } + Names existingColumns; + Names droppedColumns; + Names indexesToDrop; + + // Process result from query executed above + MYSQL_RES* result = mysql.get(); + MYSQL_ROW row; + bool first = true; + while ((row = mysql_fetch_row(result)) != 0) { + int i = 0; + const Str name(row[i++]); + const Str stype(row[i++]); + const char* l = row[i++]; + const uint32_t length = l == 0 ? 0 : static_cast(atoi(l)); + const char* m = row[i++]; + const uint64_t datetime_precision = m == 0 ? 0 : static_cast(atol(m)); + existingColumns.append(name); + const uint32_t index = newColumnsByName.index(NamedColumn(name)); + if (index == SYS_NPOS) { + if (!droppedColumns.contains(name)) { + droppedColumns.append(name); + } + } else { + const ColumnEx& column = newColumnsByName[index].getValue(); + const ColumnType type = column.getType(); + + // Take advantage of having the column definition from MySQL to check information consistency with column definition from Sparrow + if (type == COL_TIMESTAMP) { + const uint32_t mast_col_indx = master->getColumn(name); + if (mast_col_indx != SYS_NPOS) { // Actually, it should never be SYS_NPOS. Otherwise, it would mean there's a big pb. + Column& column = master->getColumns()[mast_col_indx]; + if (column.getInfo() != datetime_precision) { + spw_print_information("Decimal precision of timestamp column for table %s.%s has been adjusted to 3.", database, table); + column.setInfo(datetime_precision); + } + } + } + + bool alter_col = false; + if ((type == COL_BLOB || type == COL_STRING) + && stype.compareTo(Str(ColumnEx::getSqlType(type)), false) == 0 + && length != column.getStringSize()) { + alter_col = true; + } if (type == COL_TIMESTAMP && column.getInfo() != UINT_MAX) { + // Timestamp precision can be passed either through the column's info parameter or through its string size parameter. So check both. + uint32_t decimals = column.getInfo() != 0 ? column.getInfo() : column.getStringSize(); + if (datetime_precision != decimals) { + if ( decimals > 6 ) { + spw_print_warning("Timestamp precision is out of range (%u, %u) in declaration of column %s for table %s.%s. " + "Timestamp decimal precision must be between 0 and 6.", column.getInfo(), column.getStringSize(), name.c_str(), database, table); + } else { + spw_print_information("Timestamp precision has changed (%u, %u) vs %u in declaration of column %s for table %s.%s.", + column.getInfo(), column.getStringSize(), (uint)datetime_precision, name.c_str(), database, table); + alter_col = true; + } + //} else { + // spw_print_warning("Timestamp precision is the same (%u, %u) vs %u in declaration of column %s for table %s.%s.", + // column.getInfo(), column.getStringSize(), (uint)datetime_precision, name.c_str(), database, table); + } + } + if ( alter_col ) { + if (first) { + snprintf(tmp, sizeof(tmp), "alter table `%s`.`%s` ", database, table); + s += Str(tmp); + } else { + s += Str(", "); + } + first = false; + const Str definition(column.getDefinition()); + snprintf(tmp, sizeof(tmp), "modify column %s", definition.c_str()); + s += Str(tmp); + } + } + } + + // Detect indexes referencing dropped columns. + for (uint32_t i = 0; i < masterIndexes.length(); ++i) { + const Index& index = masterIndexes[i]; + if (index.isDropped()) { + continue; + } + const ColumnIds& ids = index.getColumnIds(); + for (uint32_t j = 0; j < ids.length(); ++j) { + const Str& columnName = masterColumns[ids[j]].getName(); + if (droppedColumns.contains(columnName)) { + indexesToDrop.append(index.getName()); + break; + } + } + } + + // Detect added columns. + AddedColumns addedColumns; + Str iafter; + for (uint32_t i = 0; i < newColumnsByName.length(); ++i) { + const NamedColumn& namedColumn = newColumnsByName[i]; + const Str& name = namedColumn.getKey(); + if (!existingColumns.contains(name)) { + addedColumns.append(AddedColumn(iafter, name)); + } + iafter = name; + } + // Check our detected dropped and added columns are correct: simulate dropping the columns, and adding the new ones, + // then compare the result with the list of columns passed as argument. + if (!droppedColumns.isEmpty() || !addedColumns.isEmpty()) { + Names simulation(existingColumns); + for (uint32_t i = 0; i < droppedColumns.length(); ++i) { + simulation.remove(droppedColumns[i]); + } + for (uint32_t i = 0; i < addedColumns.length(); ++i) { + const AddedColumn& addedColumn = addedColumns[i]; + const Str& after = addedColumn.getFirst(); + const Str& name = addedColumn.getSecond(); + const uint32_t index = after.isEmpty() ? 0 : (simulation.index(after) + 1); + simulation.insertAt(index, name); + } + bool equals = true; + if (simulation.length() == columns.length()) { + for (uint32_t i = 0; i < simulation.length(); ++i) { + if (simulation[i] != columns[i].getName()) { + equals = false; + break; + } + } + } else { + equals = false; + } + // If the result of our simulation was successful, generate the SQL queries corresponding to these changes. + // Start by the dropped indexes, then the dropped columns. Finally create the new columns. + if (equals) { + bool first = true; + for (uint32_t i = 0; i < indexesToDrop.length(); ++i) { + const Str& name = indexesToDrop[i]; + if (first) { + snprintf(tmp, sizeof(tmp), "; alter table `%s`.`%s` ", database, table); + s += Str(tmp); + } else { + s += Str(", "); + } + first = false; + snprintf(tmp, sizeof(tmp), "drop index `%s`", name.c_str()); + s += Str(tmp); + } + first = true; + for (uint32_t i = 0; i < droppedColumns.length(); ++i) { + const Str& name = droppedColumns[i]; + if (first) { + snprintf(tmp, sizeof(tmp), "; alter table `%s`.`%s` ", database, table); + s += Str(tmp); + } else { + s += Str(", "); + } + first = false; + snprintf(tmp, sizeof(tmp), "drop column `%s`", name.c_str()); + s += Str(tmp); + } + first = true; + for (uint32_t i = 0; i < addedColumns.length(); ++i) { + const AddedColumn& addedColumn = addedColumns[i]; + if (first) { + snprintf(tmp, sizeof(tmp), "; alter table `%s`.`%s` ", database, table); + s += Str(tmp); + } else { + s += Str(", "); + } + first = false; + const Str& after = addedColumn.getFirst(); + const NamedColumn key(addedColumn.getSecond()); + const uint32_t index = newColumnsByName.index(key); + const Str definition = newColumnsByName[index].getValue().getDefinition(); + Str position; + if (after.isEmpty()) { + position = Str("first"); + } else { + snprintf(tmp, sizeof(tmp), "after `%s`", after.c_str()); + position = Str(tmp); + } + snprintf(tmp, sizeof(tmp), "add column %s %s", definition.c_str(), position.c_str()); + s += Str(tmp); + } + } + } + } + } + while (true) { + // If the table does not exists, start by creating it officially through MySQL, using DDL (Data Definition Language) + if (create) { + MySQLGuard mysql(username, password); + snprintf(tmp, sizeof(tmp), "create database if not exists `%s`" + " default character set utf8mb4 collate utf8mb4_0900_ai_ci", database); + mysql.execute(tmp); + snprintf(tmp, sizeof(tmp), "create table if not exists `%s`.`%s` (", database, table); + s = Str(tmp); + bool first = true; + for (uint32_t i = 0; i < columns.length(); ++i) { + const ColumnEx& column = columns[i]; + if (first) { + first = false; + } else { + s += Str(", "); + } + const ForeignKey* fk = 0; + for (uint32_t j = 0; j < foreignKeys.length(); ++j) { + const ForeignKey& foreignKey = foreignKeys[j]; + if (static_cast(foreignKey.getColumnId()) == i) { + fk = &foreignKey; + break; + } + } + s += column.getDefinition(); + if (fk != 0) { + snprintf(tmp, sizeof(tmp), " references `%s`.`%s`(`%s`)", fk->getDatabaseName().isEmpty() ? database : fk->getDatabaseName().c_str(), + fk->getTableName().c_str(), fk->getColumnName().c_str()); + s += Str(tmp); + } + } + for (uint32_t i = 0; i < indexes.length(); ++i) { + const Index& index = indexes[i]; + snprintf(tmp, sizeof(tmp), ", %skey `index_%u` (", index.isUnique() ? "unique " : "", i); + s += Str(tmp); + first = true; + const ColumnIds& ids = index.getColumnIds(); + for (uint32_t j = 0; j < ids.length(); ++j) { + const ColumnEx& column = columns[ids[j]]; + snprintf(tmp, sizeof(tmp), "%s`%s`", first ? "" : ", ", column.getName().c_str()); + first = false; + s += Str(tmp); + } + s += Str(")"); + } + s += Str(") engine=sparrow"); + } + if (!s.isEmpty()) { + if (create) { + spw_print_information("Creating table %s.%s : %s", database, table, s.c_str()); + } else { + spw_print_information("Definition of table %s.%s has changed: %s", database, table, s.c_str()); + } + char* dummy; + char* token = my_strtok_r(const_cast(s.c_str()), ";", &dummy); + // Execute each SQL statement separately. SQL statements are separated by ';' + while (token != 0) { + MySQLGuard mysql(username, password); + mysql.execute(token); + token = my_strtok_r(0, ";", &dummy); + } + } + + // Update what cannot be changed through DDL: columns, foreign keys and DNS. + if (update(username, password, database, table, columns, indexes, foreignKeys, dnsConfiguration, aggregationPeriod, + defaultWhere, stringOptimization, maxLifetime, coalescingPeriod)) { + break; + } else { + // Retry creation in case the table has been dropped or renamed because it is incompatible. + create = true; + } + } +} + +// Update table if necessary. +// STATIC +bool InternalApi::update(const char* username, const char* password, const char* database, const char* table, + const ColumnExs& columns, const Indexes& indexes, const ForeignKeys& foreignKeys, const DnsConfiguration& dnsConfiguration, + const uint32_t aggregationPeriod, const uint64_t defaultWhere, const uint64_t stringOptimization, + const uint64_t maxLifetime, const uint64_t coalescingPeriod) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::update"); + bool incompatible = false; + { + MasterKeepAlive master = InternalApi::get(database, table, false, false, 0); + Indexes addedIndexes(indexes.length()); + Indexes droppedIndexes; + Columns masterColumns; + SYSvector ids; // Ids used by index names, if index names are formatted as "index_xxx". + { + WriteGuard guard(master->getLock()); + + // Update indexes so they reference the actual columns (take care of dropped columns). + for (uint32_t i = 0; i < indexes.length(); ++i) { + const Index& index = indexes[i]; + const Index newIndex(index.getName().c_str(), master->updateColumnIds(index.getColumnIds(), columns), index.isUnique()); + addedIndexes.append(newIndex); + } + + // Check column definitions are compatible. + if (master->compareColumns(columns)) { + check(columns, dnsConfiguration); + + // Update column definitions. + master->updateColumns(columns); + masterColumns = master->getColumns(); + + // Check index changes. + const Indexes& masterIndexes = master->getIndexes(); + for (uint32_t i = 0; i < masterIndexes.length(); ++i) { + const Index& masterIndex = masterIndexes[i]; + if (!masterIndex.isDropped() && !addedIndexes.remove(masterIndex)) { + droppedIndexes.append(masterIndex); + } else { + uint32_t id = 0; + if (sscanf(masterIndex.getName().c_str(), "index_%u", &id) == 1) { + ids.append(id); + } + } + } + // Make sure indexes have distinct names. + for (uint32_t i = 0; i < addedIndexes.length(); ++i) { + uint32_t id = 0; + Index& addedIndex = addedIndexes[i]; + if (sscanf(addedIndex.getName().c_str(), "index_%u", &id) == 1) { + if (ids.contains(id)) { + id = 0; + while (ids.contains(id)) { + id++; + } + char buffer[128]; + snprintf(buffer, sizeof(buffer), "index_%u", id); + addedIndex.setName(Str(buffer)); + } + ids.append(id); + } + } + master->setForeignKeys(foreignKeys); + master->setAggregationPeriod(aggregationPeriod); + master->setDefaultWhere(defaultWhere); + master->setStringOptimization(stringOptimization); + master->setMaxLifetime(maxLifetime); + master->setCoalescingPeriod(coalescingPeriod); + master->toDisk(); + } else { + incompatible = true; + } + } + + if (!incompatible) { + // Start coalescing in case coalescing period changed. + master->coalesce(); + + // Alter indexes if necessary. + if (!addedIndexes.isEmpty() || !droppedIndexes.isEmpty()) { + MySQLGuard mysql(username, password); + char buffer[2048]; + snprintf(buffer, sizeof(buffer), "alter table `%s`.`%s` ", database, table); + Str sql(buffer); + bool first = true; + for (uint32_t i = 0; i < droppedIndexes.length(); ++i) { + snprintf(buffer, sizeof(buffer), "%sdrop index `%s`", (first ? "" : ", "), droppedIndexes[i].getName().c_str()); + sql += Str(buffer); + first = false; + } + for (uint32_t i = 0; i < addedIndexes.length(); ++i) { + const Index& addedIndex = addedIndexes[i]; + snprintf(buffer, sizeof(buffer), "%sadd index `%s`", (first ? "" : ", "), addedIndex.getName().c_str()); + sql += Str(buffer); + const ColumnIds& columnIds = addedIndex.getColumnIds(); + for (uint32_t j = 0; j < columnIds.length(); ++j) { + const Column& column = masterColumns[columnIds[j]]; + snprintf(buffer, sizeof(buffer), "%s`%s`%s", (j == 0 ? "(" : ", "), + column.getName().c_str(), (j + 1 == columnIds.length() ? ")" : "")); + sql += Str(buffer); + } + first = false; + } + mysql.execute(sql.c_str()); + } + + // Setup DNS configuration. + TransientPartitions partitions; + DnsConfigurationGuard dnsGuard; + { + WriteGuard guard(master->getLock()); + partitions = master->setDnsConfiguration(dnsConfiguration); + dnsGuard = master->getDnsConfiguration(); + master->toDisk(); + } + for (uint32_t i = 0; i < partitions.length(); ++i) { + partitions[i]->updateDnsConfiguration(dnsGuard.get()); + } + } + } + + // If the table already exists but does not fit existing definition: drop it or rename it. + // Take care to do it with the MasterKeepAlive released, otherwise table deletion will block. + if (incompatible) { + MySQLGuard mysql(username, password); + char buffer[2048]; + if (sparrow_incompatible_table == 0) { + spw_print_warning("Table %s.%s is incompatible with new definition: drop table", database, table); + snprintf(buffer, sizeof(buffer), "drop table if exists `%s`.`%s`", database, table); + } else { + time_t ts = std::time(nullptr); + struct tm t; + gmtime_r(&ts, &t); + char newTable[1024]; + int l = snprintf(newTable, sizeof(newTable), "%s", table); + if (l == 0 || strftime(newTable + l, sizeof(newTable) - l, "%Y%m%d%H%M%S", &t) == 0) { + throw SparrowException::create(false, "Cannot rename table %s.%s; internal error.", database, table); + } + spw_print_warning("Table %s.%sis incompatible with new definition: rename table to %s.%s", database, table, database, newTable); + snprintf(buffer, sizeof(buffer), "rename table `%s`.`%s` to `%s`.`%s`", database, table, database, newTable); + } + mysql.execute(buffer); + snprintf(buffer, sizeof(buffer), "Table %s.%s has been %s", database, table, (sparrow_incompatible_table == 0 ? "dropped" : "renamed")); + return false; + } else { + return true; + } +} + +// STATIC +void InternalApi::check(const ColumnExs& columns, const DnsConfiguration& dnsConfiguration) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::check"); + bool hasDnsIdentifier = false; + bool hasReverseDns = false; + for (uint32_t i = 0; i < columns.length(); ++i) { + const Column& column = columns[i]; + if (column.isFlagSet(COL_IP_LOOKUP)) { + if (column.getType() != COL_STRING) { + throw SparrowException::create(false, "Column \"%s\" must be a string since it contains ip address lookups", + column.getName().c_str()); + } + const uint32_t index = column.getInfo(); + if (index >= columns.length()) { + throw SparrowException::create(false, "Cannot find ip address column referenced by lookup column \"%s\"", + column.getName().c_str()); + } + const Column& ip = columns[index]; + if (ip.getType() != COL_BLOB || !ip.isFlagSet(COL_IP_ADDRESS)) { + throw SparrowException::create(false, "Column \"%s\" contains reverse ip lookups of values from column \"%s\", but this column does not contain ip addresses", + column.getName().c_str(), ip.getName().c_str()); + } + if (column.isFlagSet(COL_NULLABLE) && !ip.isFlagSet(COL_NULLABLE)) { + throw SparrowException::create(false, "Nullable column \"%s\" contains ip lookups of values from column \"%s\", but this column is not nullable", + column.getName().c_str(), ip.getName().c_str()); + } + hasReverseDns = true; + } + if (column.isFlagSet(COL_DNS_IDENTIFIER)) { + if (hasDnsIdentifier) { + throw SparrowException::create(false, "Only one column can be the DNS identifier"); + } + hasDnsIdentifier = true; + } + } + if (hasReverseDns && !dnsConfiguration.isEmpty()) { + if (!hasDnsIdentifier && (dnsConfiguration.entries() > 1 || !dnsConfiguration.contains(DnsConfigId(-1)))) { + throw SparrowException::create(false, "At least one column contains ip lookups and there is no DNS identifier column - in this case, the DNS configuration can contain only the wildcard identifier"); + } + } +} + +// Gets a master file. +// STATIC +MasterKeepAlive InternalApi::get(const char* database, const char* table, + const bool create, const bool remove, TABLE_SHARE* s) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::get"); + Guard guard(hashLock_); + const Master key(database, table, true); + Master* master = hash_.find(&key); + const bool inHash = master != 0; + if (master == 0 && create) { + // Not found in memory: maybe on disk? + master = Master::fromDisk(database, table, s); + } + if (master == 0) { + if (create) { + // Not found in memory nor on disk: create it in memory. + DBUG_PRINT("sparrow_api", ("Create new master %s.%s", database, table)); + master = new Master(database, table, false); + } else { + throw SparrowException::create(false, "Cannot find master file for table %s.%s", database, table); + } + } + if (remove) { + if (inHash) { + DBUG_PRINT("sparrow_api", ("Remove master %s.%s from hash", database, table)); + hash_.remove(master); + } + } else if (!inHash && master != 0) { + DBUG_PRINT("sparrow_api", ("Insert master %s.%s into hash", database, table)); + hash_.insert(master); + + // Start alterations if necessary. + master->startIndexAlter(false); + + // Start coalescing if necessary. + master->coalesce(); + } + return MasterKeepAlive(master); +} + +// Gets all master files currently in the hash table. +// STATIC +Masters InternalApi::getAll() { + SPARROW_ENTER("InternalApi::getAll"); + Guard guard(hashLock_); + Masters masters(hash_.entries()); + SYSpHashIterator iterator(hash_); + while (++iterator) { + masters.insert(MasterKeepAlive(iterator.key())); + } + return masters; +} + +// Flushes the transient partition of all master files. +// STATIC +void InternalApi::flushAll(const bool shutdown) { + SPARROW_ENTER("InternalApi::getAll"); + + // If a InternalApi::flushAll() is already being processed somewhere in another thread + // drop this one + if (!Atomic::cas32(&InternalApi::flushOnGoing_, 0, 1)) { + return; + } + // If transient partitions are already being flushed, no need to force another "flush all" job + if ( TransientPartition::getNbFlushs() > 0 ) { + Atomic::dec32(&InternalApi::flushOnGoing_); + return; + } + + Masters masters = InternalApi::getAll(); + bool flush = false; + for (uint32_t i = 0; i < masters.length(); ++i) { + if (masters[i]->forceFlush()) { + flush = true; + } + } + + Atomic::dec32(&InternalApi::flushOnGoing_); + + if (shutdown) { + if (flush) { + spw_print_information("Sparrow is flushing transient data to disk..."); + } + TransientPartition::waitForFlushs(); + } +} + +// STATIC +// Stops the pending coalescing tasks +void InternalApi::StopCoalescingTasks(const char* schema) { + bool all = (schema == NULL || strlen(schema) == 0); + Masters masters = InternalApi::getAll(); + const uint32_t nbMasters = masters.length(); + for (uint32_t i=0; igetLock()); + master->rename(newDatabase, newTable); + } + + // Put the master file back in the hash (hash code changed). + Guard guard(hashLock_); + hash_.insert(master.get()); +} + +// Writes data to a Sparrow table. +// STATIC +void InternalApi::write(const char* database, const char* table, ByteBuffer& buffer, const uint32_t rows) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::write"); + DBUG_PRINT("sparrow_api", ("Inserting %u rows into table %s.%s", rows, database, table)); + + // Find master file for given table. + MasterKeepAlive master = InternalApi::get(database, table, false, false, 0); + uint64_t timestamp = 0; + while (true) { + if (TransientPartition::waitForRoom(master.isStopping())) { + // Get the current transient partition, or create a new one if necessary. + TransientPartitionGuard partition = master->getTransientPartition( timestamp ); + + // Insert data. + if (partition->insert(buffer, rows, timestamp)) { + // Insertion succeeded. + break; + } + } else { + throw SparrowException::create(false, "Insertion aborted because table %s.%s is being deleted", database, table); + } + // The partition was full or a timestamp was out of the coalescing period: retry. + } +} + +// Writes data to a Sparrow table using a subset of columns. +// STATIC +void InternalApi::write(const char* database, const char* table, const Names& colNames, ByteBuffer& buffer, const uint32_t rows) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::write"); + DBUG_PRINT("sparrow_api", ("Inserting %u rows into table %s.%s on a selection of %u columns", rows, database, table, colNames.entries())); + + // Find master file for given table. + MasterKeepAlive master = InternalApi::get(database, table, false, false, 0); + ColumnIds colIds; + master->getColumnIds(colNames, colIds); + uint64_t timestamp = 0; + while (true) { + if (TransientPartition::waitForRoom(master.isStopping())) { + // Get the current transient partition, or create a new one if necessary. + TransientPartitionGuard partition = master->getTransientPartition( timestamp ); + + // Insert data. + if (partition->insert(buffer, rows, colNames, colIds, timestamp)) { + // Insertion succeeded. + break; + } + } else { + throw SparrowException::create(false, "Insertion aborted because table %s.%s is being deleted", database, table); + } + // The partition was full or a timestamp was out of the coalescing period: retry. + } +} + +// STATIC +void InternalApi::removePartitions(const char* database, const char* table, const TimePeriod& period) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::removePartitions"); +#ifndef NDEBUG + const Str sPeriod = Str::fromTimePeriod(period); + DBUG_PRINT("sparrow_api", ("Removing partitions in interval %s from table %s.%s", sPeriod.c_str(), database, table)); +#endif + + // Find master file for given table. + MasterKeepAlive master = InternalApi::get(database, table, false, false, 0); + master->removePartitions(period); +} + +// Helper to print a grid. The parameter totalValues, if not null, gives the number of trailing values +// to be printed as a total on the last line. +// STATIC +void InternalApi::printGrid(PrintBuffer& buffer, const SYSvector& headers, SYSslist& values, const uint32_t totalValues) { + SYSslist strings; + SYSarray lengths(headers.length(), 0); + for (uint32_t i = 0; i < lengths.length(); ++i) { + strings.append(headers[i]); + } + strings.appendAll(values); + SYSslistIterator iterator(strings); + uint32_t i = 0; + while (++iterator) { + uint32_t index = i++ % lengths.length(); + lengths[index] = std::max(lengths[index], iterator.key().length()); + } + + char separator[2048]; + Str totalSeparator; + char* t = separator; + for (i = 0; i < lengths.length(); ++i) { + *t++ = '+'; + if (totalValues != 0 && i == totalValues) { + totalSeparator = Str(separator, static_cast(t - separator)); + } + for (int j = 0; j < lengths[i] + 2; ++j) { + *t++ = '-'; + } + } + *t++ = '+'; + if (totalValues != 0 && i == totalValues) { + totalSeparator = Str(separator, static_cast(t - separator)); + } + *t++ = 0; + + // Header + iterator.reset(); + buffer << separator << "\n"; + int pos = 0; + for (i = 0; i < lengths.length(); ++i) { + ++iterator; + ++pos; + const Str& s = iterator.key(); + buffer << "| " << s; + for (int j = 0; j < lengths[i] - s.length(); ++j) { + buffer << " "; + } + buffer << " "; + } + buffer << "|\n" << separator << "\n"; + i = 0; + while (++iterator) { + const Str& s = iterator.key(); + buffer << "| " << s; + for (int j = 0; j < lengths[i] - s.length(); ++j) { + buffer << " "; + } + buffer << " "; + i++; + ++pos; + if (i == lengths.length()) { + buffer << "|\n"; + if (totalValues > 0 && pos == static_cast(strings.entries() - totalValues)) { + buffer << separator << "\n"; + } + i = 0; + } + } + if (totalValues == 0) { + buffer << separator << "\n"; + } else { + if (totalValues < headers.length()) { + buffer << "|\n"; + } + buffer << totalSeparator << "\n"; + } +} + +// STATIC +void InternalApi::reportAlterStatus(PrintBuffer& buffer, const SortedMasters& masters) { + SPARROW_ENTER("InternalApi::reportAlterStatus"); + SYSslist strings; + bool isAltering = false; + for (uint32_t i = 0; i < masters.length(); ++i) { + const Master& master = *masters[i]; + ReadGuard guard(master.getLock()); + if (master.getIndexAlterStatus(strings)) { + isAltering = true; + } + } + if (isAltering) { + const char* h[] = { "Table", "Alteration", "Elapsed", "Left", "Progress" }; + SYSvector headers(sizeof(h) / sizeof(h[0])); + for (uint32_t i = 0; i < headers.capacity(); ++i) { + headers.append(Str(h[i])); + } + buffer << "\nOn going table alterations:\n\n"; + InternalApi::printGrid(buffer, headers, strings, 0); + } +} + +// STATIC +void InternalApi::reportCoalescingStatus(PrintBuffer& buffer, const SortedMasters& masters) { + SPARROW_ENTER("InternalApi::reportCoalescingStatus"); + SYSslist strings; + const char* h[] = { "Table", "Coalescing Period", "Progress" }; + SYSvector headers(sizeof(h) / sizeof(h[0])); + for (uint32_t i = 0; i < headers.capacity(); ++i) { + headers.append(Str(h[i])); + } + char tmp[1024]; + for (uint32_t i = 0; i < masters.length(); ++i) { + const Master& master = *masters[i]; + ReadGuard masterGuard(master.getLock()); + if (master.getNewest(true) == 0) { + continue; + } + snprintf(tmp, sizeof(tmp), "%s.%s", master.getDatabase().c_str(), master.getTable().c_str()); + strings.append(Str(tmp)); + strings.append(Str::fromDuration(master.getCoalescingPeriod())); + const double coalescingPercentage = master.getCoalescingPercentage(); + if (coalescingPercentage < 0) { + strings.append(Str("N/A")); + } else { + snprintf(tmp, sizeof(tmp), "%.1f%%", coalescingPercentage); + strings.append(Str(tmp)); + } + } + buffer << "\nCoalescing status:\n\n"; + InternalApi::printGrid(buffer, headers, strings, 0); +} + +// Report status of Sparrow tables. +// STATIC +void InternalApi::report(PrintBuffer& buffer) _THROW_(SparrowException) { + SPARROW_ENTER("InternalApi::report"); + SortedMasters masters = InternalApi::getAll(); + if (masters.isEmpty()) { + buffer << "\nThere are no Sparrow tables.\n"; + return; + } + buffer << "\nSparrow tables:\n\n"; + const char* h[] = { "Name", "Data", "Index", "Total", "Period", "Partitions", "Files", "Records", "Average", "Oldest", "Newest", "Lifetime (max)", "Avg row count" }; + SYSvector headers(sizeof(h) / sizeof(h[0])); + for (uint32_t i = 0; i < headers.capacity(); ++i) { + headers.append(Str(h[i])); + } + char tmp[1024]; + uint64_t totalData = 0; + uint64_t totalIndex = 0; + uint64_t projected = 0; // Projected total size. + uint64_t throughput = 0; // Bytes per day. + SYSslist strings; + for (uint32_t i = 0; i < masters.length(); ++i) { + const Master& master = *masters[i]; + ReadGuard masterGuard(master.getLock()); + const uint32_t partitions = master.getPartitions().length(); + if (partitions > 0) { + snprintf(tmp, sizeof(tmp), "%s.%s", master.getDatabase().c_str(), master.getTable().c_str()); + strings.append(Str(tmp)); + strings.append(Str::fromSize(master.getDataSize())); + totalData += master.getDataSize(); + strings.append(Str::fromSize(master.getIndexSize())); + totalIndex += master.getIndexSize(); + const uint64_t size = master.getDataSize() + master.getIndexSize(); + strings.append(Str::fromSize(size)); + snprintf(tmp, sizeof(tmp), "%u", master.getAggregationPeriod()); + strings.append(Str(tmp)); + snprintf(tmp, sizeof(tmp), "%u", partitions); + strings.append(Str(tmp)); + snprintf(tmp, sizeof(tmp), "%u", 1 + master.getIndexMappings().length()); + strings.append(Str(tmp)); + const uint64_t persistentRecords = master.getRecords(); + const uint64_t transientRecords = master.getTransientRecords(); + const uint64_t records = persistentRecords + transientRecords; + snprintf(tmp, sizeof(tmp), "%llu", static_cast(records)); + strings.append(Str(tmp)); + if (records == 0 || persistentRecords == 0) { + strings.append(Str("N/A")); + } else { + snprintf(tmp, sizeof(tmp), "%llu", static_cast(size / persistentRecords)); + strings.append(Str(tmp)); + } + const uint64_t oldest = master.getOldest(); + strings.append(oldest == 0 ? Str("N/A") : Str::fromTimestamp(oldest)); + const uint64_t newest = master.getNewest(); + strings.append(newest == 0 ? Str("N/A") : Str::fromTimestamp(newest)); + const uint64_t lifetime = master.getAge(); + const Str slifetime(Str::fromDuration(lifetime)); + const uint64_t maxLifetime = master.getMaxLifetime(); + const Str smaxLifetime(Str::fromDuration(maxLifetime)); + snprintf(tmp, sizeof(tmp), "%s (%s)", slifetime.c_str(), smaxLifetime.c_str()); + strings.append(Str(tmp)); + uint64_t aggLifetime; + if (master.getAggregationPeriod() == 0) { + aggLifetime = lifetime; + } else { + uint64_t aggregationPeriod = static_cast(master.getAggregationPeriod())*1000; + uint64_t oldest_rounded = (oldest/aggregationPeriod)*aggregationPeriod; + uint64_t newest_rounded = (newest/aggregationPeriod)*aggregationPeriod; + uint64_t lifetime_rounded = newest_rounded - oldest_rounded; + aggLifetime = ((lifetime_rounded/(master.getAggregationPeriod()*1000))+1)*master.getAggregationPeriod()*1000; + } + + if (aggLifetime == 0) { + strings.append(Str("N/A")); + } else { + uint64_t avgRowCount = records*master.getAggregationPeriod()*1000/aggLifetime; + snprintf(tmp, sizeof(tmp), "%llu", static_cast(avgRowCount)); + strings.append(Str(tmp)); + } + if (aggLifetime != 0) { + const uint64_t tableSize = master.getDataSize() + master.getIndexSize(); + double ratio = static_cast(maxLifetime) / aggLifetime; + projected += static_cast(ratio * tableSize); + ratio = 86400000.0 / aggLifetime; + throughput += static_cast(ratio * tableSize); + } + } + } + strings.append(Str("TOTAL")); + strings.append(Str::fromSize(totalData)); + strings.append(Str::fromSize(totalIndex)); + if (sparrow_max_disk_size == 0) { + strings.append(Str::fromSize(totalData + totalIndex)); + } else { + snprintf(tmp, sizeof(tmp), "%s (%s)", Str::fromSize(totalData + totalIndex).c_str(), + Str::fromSize(sparrow_max_disk_size).c_str()); + strings.append(Str(tmp)); + } + InternalApi::printGrid(buffer, headers, strings, 4); + + if (projected != 0) { + buffer << "\nProjected total size is " << Str::fromSize(projected) << ". Write throughput is " << Str::fromSize(throughput) << " per day.\n"; + } + + // Alteration status of all tables. + InternalApi::reportAlterStatus(buffer, masters); + + // Coalescing status of all tables. + InternalApi::reportCoalescingStatus(buffer, masters); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MySQLGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +MySQLGuard::MySQLGuard(const char* username, const char* password) _THROW_(SparrowException) + : result_(0) { + mysql_ = mysql_init(0); + if (mysql_ == NULL) { + throw SparrowException::create(false, "Failed to initialize connection handler."); + } + + uint protocol = MYSQL_PROTOCOL_TCP; + mysql_options(mysql_, MYSQL_OPT_PROTOCOL, &protocol); + + // Use big timeouts because some operations may be long (e.g. drop Sparrow table + // may require deleting a lot of files.) + uint big = 86400; + mysql_options(mysql_, MYSQL_OPT_READ_TIMEOUT, &big); + mysql_options(mysql_, MYSQL_OPT_WRITE_TIMEOUT, &big); + if (mysql_real_connect(mysql_, 0, username, password, 0, mysqld_port, 0, 0) == 0) { + MySQLGuard::check(mysql_, 0); + } +} + +void MySQLGuard::execute(const char* stmt) _THROW_(SparrowException) { + SPARROW_ENTER("MySQLGuard::execute"); + DBUG_PRINT("sparrow_api", ("Statement: %s", stmt)); + if (mysql_real_query(mysql_, stmt, (uint)strlen(stmt)) != 0) { + MySQLGuard::check(mysql_, stmt); + } + clear(); + result_ = mysql_store_result(mysql_); +} + +MySQLGuard::~MySQLGuard() { + clear(); + mysql_close(mysql_); +} + +void MySQLGuard::clear() { + if (result_ != 0) { + mysql_free_result(result_); + result_ = 0; + } +} + + +// STATIC +void MySQLGuard::check(MYSQL* mysql, const char* stmt) _THROW_(SparrowException) { + const char* sqlError = mysql_error(mysql); + unsigned int err_code = mysql_errno(mysql); + const char* msg = "unknown error"; + if (sqlError == 0 || strlen(sqlError) == 0) { + uint e = mysql->net.last_errno; + if (e != 0) { + msg = ER_THD(current_thd, e); + } + } else { + msg = sqlError; + } + + if (stmt == 0) { + SparrowException e = SparrowException::create(false, "Cannot connect: %u, %s", err_code, msg); + e.set_err_code( err_code ); + throw e; + } else { + // Truncate statement to 255 chars if necessary. + char tstmt[256]; + strncpy(tstmt, stmt, sizeof(tstmt)); + tstmt[sizeof(tstmt) - 1] = 0; + SparrowException e = SparrowException::create(false, "Cannot execute \"%s\": %u, %s", tstmt, err_code, msg); + e.set_err_code( err_code ); + throw e; + } +} + +} diff --git a/storage/sparrow/engine/internalapi.h b/storage/sparrow/engine/internalapi.h new file mode 100644 index 000000000000..1d25604b7e25 --- /dev/null +++ b/storage/sparrow/engine/internalapi.h @@ -0,0 +1,153 @@ +/* + Internal API. +*/ + +#ifndef _engine_internalapi_h_ +#define _engine_internalapi_h_ + +#include "types.h" +#include "master.h" + +struct MYSQL; +struct MYSQL_RES; + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MySQLGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MySQLGuard { +private: + + MYSQL* mysql_; + MYSQL_RES* result_; + +public: + + MySQLGuard(const char* username, const char* password) _THROW_(SparrowException); + + void execute(const char* stmt) _THROW_(SparrowException); + + MYSQL_RES* get() { + return result_; + } + + ~MySQLGuard(); + + void clear(); + + static void check(MYSQL* mysql, const char* stmt) _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Engines +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Engines { +private: + + uint32_t bits_; + +public: + + Engines(uint32_t bits) : bits_(bits) { + } + + const char* getName(int i) const { + const char* engines[] = { "sparrow", "myisam", "innodb" }; + return engines[i]; + } + + // Table suffix. + const char* getSuffix(int i) const { + const char* suffix[] = { "", "_myisam", "_innodb" }; + return suffix[i]; + } + + int getCount() const { + return 3; + } + + bool isEnabled(int i) const { + return (bits_ & (1 << i)) != 0; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// InternalApi +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class InternalApi { +private: + + // Static hash table of directories. + static SYSpHash hash_; + static Lock hashLock_; + + // Is a flush on going? + static volatile uint32_t flushOnGoing_; + +private: + + static bool update(const char* username, const char* password, const char* database, const char* table, + const ColumnExs& columns, const Indexes& indexes, const ForeignKeys& foreignKeys, const DnsConfiguration& dnsConfiguration, + const uint32_t aggregationPeriod, const uint64_t defaultWhere, const uint64_t stringOptimization, + const uint64_t maxLifetime, const uint64_t coalescingPeriod) _THROW_(SparrowException); + static void check(const ColumnExs& columns, const DnsConfiguration& dnsConfiguration) _THROW_(SparrowException); + + static void reportAlterStatus(PrintBuffer& buffer, const SortedMasters& masters); + static void reportCoalescingStatus(PrintBuffer& buffer, const SortedMasters& masters); + +public: + + // Setup. + static void setup() _THROW_(SparrowException); + + // Initializes table. + static void init(const char* username, const char* password, const char* database, const char* table, + const ColumnExs& columns, const Indexes& indexes, const ForeignKeys& foreignKeys, const DnsConfiguration& dnsConfiguration, + const uint32_t aggregationPeriod, const uint64_t defaultWhere, const uint64_t stringOptimization, + const uint64_t maxLifetime, const uint64_t coalescingPeriod) _THROW_(SparrowException); + + // Writes data. + static void write(const char* database, const char* table, ByteBuffer& buffer, const uint32_t rows) _THROW_(SparrowException); + + // Writes data on a selection of columns. + static void write(const char* database, const char* table, const Names& columns, ByteBuffer& buffer, const uint32_t rows) _THROW_(SparrowException); + + // Removes some partitions. + static void removePartitions(const char* database, const char* table, const TimePeriod& period) _THROW_(SparrowException); + + // Renames a table + static void rename(const char* database, const char* table, + const char* newDatabase, const char* newTable) _THROW_(SparrowException); + + // Gets a master file. + static MasterKeepAlive get(const char* database, const char* table, + const bool create, const bool remove, TABLE_SHARE* s) _THROW_(SparrowException); + + // Gets all master files. + static Masters getAll(); + + // Flushes the transient partition of all master files. + static void flushAll(const bool shutdown); + + // Stops the coalescing + static void StopCoalescingTasks(const char* schema=NULL); + + // Reporting. + static void printGrid(PrintBuffer& buffer, const SYSvector& headers, SYSslist& values, const uint32_t totalValues); + static void report(PrintBuffer& buffer) _THROW_(SparrowException); + + // Debugging. +#ifndef NDEBUG + static bool hashContains(Master* master) { + return hash_.contains(master); + } +#endif +}; + +} + +#endif /* #ifndef _engine_internalapi_h_ */ diff --git a/storage/sparrow/engine/interval.h b/storage/sparrow/engine/interval.h new file mode 100644 index 000000000000..5c493cdda7e0 --- /dev/null +++ b/storage/sparrow/engine/interval.h @@ -0,0 +1,332 @@ +/* + Generic interval. +*/ + +#ifndef _engine_interval_h_ +#define _engine_interval_h_ + +#include "intervaltree.h" + +namespace Sparrow { + +template class Interval : public AbstractInterval { +private: + T lower_; + T upper_; + unsigned int lowerIncluded_:1; + unsigned int upperIncluded_:1; + unsigned int lowerSet_:1; + unsigned int upperSet_:1; + unsigned int pad_:28; + +private: + + // Creates a void (empty) interval. + // This constructor is private and takes a dummy parameter to distinguish from default constructor. + Interval(const bool foo) { + lowerSet_ = false; + upperSet_ = false; + lowerIncluded_ = true; + upperIncluded_ = true; + } + + // Checks whether the given interval info is valid. + static bool check(const T* lower, const bool lowerIncluded, const T* upper, const bool upperIncluded) { + if ((lower == 0 && lowerIncluded) || (upper == 0 && upperIncluded)) { + // Infinite bound cannot be included in interval. + return false; + } + if (lower != 0 && upper != 0) { + if (*lower > *upper) { + // Lower bound cannot be greater than upper bound. + return false; + } else if (!(*upper > *lower)) { + // Bounds are equal. + if (!lowerIncluded || !upperIncluded) { + // Single value with lower or upper bound excluded. + return false; + } + } + } + return true; + } + + // Compare two bounds. + static bool compareBounds(const T* bound1, const T* bound2) { + if (bound1 == 0) { + return bound2 == 0; + } else { + return bound2 != 0 && *bound1 == *bound2; + } + } + + // Compare two upper or lower bounds. + static int compareBounds(const T* bound1, const T* bound2, const bool upper) { + if (bound1 == 0) { + if (bound2 == 0) { + return 0; + } else if (upper) { + return 1; + } else { + return -1; + } + } else if (bound2 != 0) { + if (*bound1 < *bound2) { + return -1; + } else if (*bound2 < *bound1) { + return 1; + } + return 0; + } else if (upper) { + return -1; + } else { + return 1; + } + } + +public: + + Interval() : lowerIncluded_(false), upperIncluded_(false), lowerSet_(false), upperSet_(false) { + } + Interval(const T bound) : lower_(bound), upper_(bound), lowerIncluded_(true), upperIncluded_(true), lowerSet_(true), upperSet_(true) { + } + Interval(const T lower, const T upper) : lower_(lower), upper_(upper), lowerIncluded_(true), upperIncluded_(true), lowerSet_(true), upperSet_(true) { + } + Interval(const T* lower, const T* upper, const bool lowerIncluded, const bool upperIncluded) : lowerIncluded_(lowerIncluded), upperIncluded_(upperIncluded), + lowerSet_(lower != 0), upperSet_(upper != 0) { + if (lower != 0) { + lower_ = *lower; + } + if (upper != 0) { + upper_ = *upper; + } + } + + // Gets lower bound, returns 0 if -infinite. + const T* getLow() const { + return lowerSet_ ? &lower_ : 0; + } + + // Gets upper bound, returns 0 if +infinite. + const T* getUp() const { + return upperSet_ ? &upper_ : 0; + } + + T getLength() const { + return *getUp() - *getLow(); + } + + bool isLowerIncluded() const { + return lowerIncluded_; + } + + bool isUpperIncluded() const { + return upperIncluded_; + } + + bool isVoid() const { + return lowerIncluded_ && upperIncluded_ && !lowerSet_ && !upperSet_; + } + + bool isAll() const { + return !lowerIncluded_ && !upperIncluded_ && !lowerSet_ && !upperSet_; + } + + bool isPoint() const { + return lowerSet_ && upperSet_ && lowerIncluded_ && upperIncluded_ && lower_ == upper_; + } + + bool contains(const Interval& interval) const { + if (interval.isVoid()) { + return true; + } + if (isVoid()) { + return false; + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + if (lowerCmp > 0 || (lowerCmp == 0 && !lowerIncluded_ && interval.lowerIncluded_)) { + return false; + } + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + if (upperCmp < 0 || (upperCmp == 0 && !upperIncluded_ && interval.upperIncluded_)) { + return false; + } + return true; + } + + bool contains(const T& value) const { + if (isVoid()) { + return false; + } + const int lowerCmp = compareBounds(getLow(), &value, false); + if (lowerCmp > 0 || (lowerCmp == 0 && !lowerIncluded_)) { + return false; + } + const int upperCmp = compareBounds(getUp(), &value, true); + if (upperCmp < 0 || (upperCmp == 0 && !upperIncluded_)) { + return false; + } + return true; + } + + bool isAdjacent(const Interval& interval) const { + if (compareBounds(getUp(), interval.getLow()) + && upperIncluded_ == !interval.lowerIncluded_) { + return true; + } + if (compareBounds(getLow(), interval.getUp()) + && lowerIncluded_ == !interval.upperIncluded_) { + return true; + } + return false; + } + + Interval makeIntersection(const Interval& interval) const { + if (isVoid() || interval.isVoid()) { + // One of the interval is void: return void. + return Interval(false); + } else if (isAll()) { + // One of the interval is all: return the other one. + return interval; + } else if (interval.isAll()) { + return *this; + } else if (isAdjacent(interval)) { + // Intervals are adjacent: return void. + return Interval(false); + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + const T* lowerBound = lowerCmp >= 0 ? getLow() : interval.getLow(); + const bool lowerBoundIncluded = lowerCmp >= 0 ? lowerIncluded_ : interval.lowerIncluded_; + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + const T* upperBound = upperCmp >= 0 ? interval.getUp() : getUp(); + const bool upperBoundIncluded = upperCmp >= 0 ? interval.upperIncluded_ : upperIncluded_; + if (check(lowerBound, lowerBoundIncluded, upperBound, upperBoundIncluded)) { + return Interval(lowerBound, upperBound, lowerBoundIncluded, upperBoundIncluded); + } else { + // No intersection: return void interval. + return Interval(false); + } + } + + bool intersects(const Interval& interval) const { + if (isVoid() || interval.isVoid()) { + return false; + } else if (isAll() || interval.isAll()) { + return true; + } else if (isAdjacent(interval)) { + return false; + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + const T* lowerBound = lowerCmp >= 0 ? getLow() : interval.getLow(); + const bool lowerBoundIncluded = lowerCmp >= 0 ? lowerIncluded_ : interval.lowerIncluded_; + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + const T* upperBound = upperCmp >= 0 ? interval.getUp() : getUp(); + const bool upperBoundIncluded = upperCmp >= 0 ? interval.upperIncluded_ : upperIncluded_; + return check(lowerBound, lowerBoundIncluded, upperBound, upperBoundIncluded); + } + + Interval makeUnion(const Interval& interval) const { + // One of the interval is void: return the other one. + if (isVoid()) { + return interval; + } + if (interval.isVoid()) { + return *this; + } + + // One of the interval is all: return all. + if (isAll()) { + return *this; + } + if (interval.isAll()) { + return interval; + } + + // Cannot merge intervals that do not intersect. + if (!intersects(interval) && !isAdjacent(interval)) { + return Interval(false); + } + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + const int upperCmp = compareBounds(getUp(), interval.getUp(), true); + const T* lowerBound = lowerCmp >= 0 ? interval.getLow() : getLow(); + const bool lowerBoundIncluded = lowerCmp >= 0 ? interval.lowerIncluded_ : lowerIncluded_; + const T* upperBound = upperCmp >= 0 ? getUp() : interval.getUp(); + const bool upperBoundIncluded = upperCmp >= 0 ? upperIncluded_ : interval.upperIncluded_; + return Interval(lowerBound, upperBound, lowerBoundIncluded, upperBoundIncluded); + } + + // Returns one or two intervals adjacent to this interval so the union of + // all those intervals represent all values. + // There are two result intervals if and only if !result[1].isVoid(). + void makeNot(Interval* result) const { + if (isAll()) { + result[0] = Interval(false); + result[1] = Interval(false); + } else if (isVoid()) { + result[0] = Interval(); + result[1] = Interval(false); + } else { + const T* lowerBound = getLow(); + const T* upperBound = getUp(); + int index = 0; + if (lowerBound == 0 || upperBound != 0) { + result[index++] = Interval(upperBound, 0, !isUpperIncluded(), false); + } + if (lowerBound != 0 || upperBound == 0) { + result[index] = Interval(0, lowerBound, false, !isLowerIncluded()); + } + } + } + + // For sorting, use the lower bound only. + bool operator < (const Interval& interval) const { + const int lowerCmp = compareBounds(getLow(), interval.getLow(), false); + if (lowerCmp != 0) { + return lowerCmp < 0; + } + if (lowerIncluded_) { + return !interval.lowerIncluded_; + } else { + return false; + } + } + + bool operator == (const Interval& interval) const { + return lower_ == interval.lower_ && upper_ == interval.upper_ + && lowerIncluded_ == interval.lowerIncluded_ && upperIncluded_ == interval.upperIncluded_ + && lowerSet_ == interval.lowerSet_ && upperSet_ == interval.upperSet_; + } + + // Implementation of AbstractInterval. + T getMin() const override { + const T* low = getLow(); + if (low == 0) { + return AbstractInterval::getSmallest(); + } else { + return *low; + } + } + + T getMax() const override { + const T* up = getUp(); + if (up == 0) { + return AbstractInterval::getLargest(); + } else { + return *up; + } + } + + int compareTo(const AbstractInterval& right) const override { + if (getMin() == right.getMin()) { + return 0; + } else if (getMin() < right.getMin()) { + return -1; + } else { + return 1; + } + } +}; + +} + +#endif /* #ifndef _engine_interval_h_ */ diff --git a/storage/sparrow/engine/intervaltree.h b/storage/sparrow/engine/intervaltree.h new file mode 100644 index 000000000000..3aff270373e3 --- /dev/null +++ b/storage/sparrow/engine/intervaltree.h @@ -0,0 +1,601 @@ +/* + AbstractInterval tree + */ + +#ifndef _engine_intervaltree_h_ +#define _engine_intervaltree_h_ + +#include "vec.h" +#include + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AbstractInterval +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class AbstractInterval { +public: + + virtual ~AbstractInterval() { + } + virtual T getMin() const = 0; + virtual T getMax() const = 0; + virtual int compareTo(const AbstractInterval& right) const = 0; + + static T getSmallest(); // Smallest possible value. + static T getLargest(); // Largest possible value. +}; + +template<> inline uint64_t AbstractInterval::getSmallest() { + return 0; +} + +template<> inline uint64_t AbstractInterval::getLargest() { + return ULLONG_MAX; +} + +template class SimpleInterval : public AbstractInterval { +private: + + const T low_; + const T high_; + +public: + + SimpleInterval(const T& low, const T& high) : low_(low), high_(high) { + } + + T getMin() const override { + return low_; + } + + T getMax() const override { + return high_; + } + + int compareTo(const AbstractInterval& right) const override { + if (getMin() == right.getMin()) { + return 0; + } else if (getMin() < right.getMin()) { + return -1; + } else { + return 1; + } + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IntervalTreeNode +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class IntervalTree; +template class IntervalTreeNode { + friend class IntervalTree; + +private: + + AbstractInterval* interval_; + T maxHigh_; + bool red_; + IntervalTreeNode* left_; + IntervalTreeNode* right_; + IntervalTreeNode* parent_; + +public: + + IntervalTreeNode(); + IntervalTreeNode(AbstractInterval* interval); + ~IntervalTreeNode(); + AbstractInterval* getInterval(); +}; + +template inline IntervalTreeNode::IntervalTreeNode() { +} + +template inline IntervalTreeNode::IntervalTreeNode(AbstractInterval* interval) + : interval_(interval), maxHigh_(interval->getMax()), left_(0), right_(0), parent_(0) { +} + +template inline IntervalTreeNode::~IntervalTreeNode() { +} + +template inline AbstractInterval* IntervalTreeNode::getInterval() { + return interval_; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IntervalTree +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class IntervalTree { +private: + + IntervalTreeNode* nil_; + IntervalTreeNode* root_; + +private: + + void leftRotate(IntervalTreeNode* node); + void rightRotate(IntervalTreeNode* node); + void insertHelp(IntervalTreeNode* node); + void TreePrintHelper(IntervalTreeNode* node) const; + void fixMaxHigh(IntervalTreeNode* node); + void removeFixUp(IntervalTreeNode* node); + static bool overlap(const T low1, const T high1, const T low2, const T high2); + void findOverlaps(IntervalTreeNode* x, const AbstractInterval& interval, SYSpVector, 256>& intervals) const; + IntervalTreeNode* getSuccessorOf(IntervalTreeNode* node) const; + void deleteNode(IntervalTreeNode* node); + +#ifndef NDEBUG + void checkMaxHighFields(IntervalTreeNode* x) const; + T checkMaxHighFieldsHelper(IntervalTreeNode* y, const T currentHigh, T match) const; + void checkAssumptions() const; + void checkOrder(IntervalTreeNode* x) const; +#endif + +public: + + IntervalTree(); + ~IntervalTree(); + + void remove(const AbstractInterval& interval); + void insert(AbstractInterval* interval); + IntervalTreeNode* find(const AbstractInterval& interval) const; + void findOverlaps(const AbstractInterval& interval, SYSpVector, 256>& intervals) const; + void clear(); + IntervalTreeNode* getMin() const; + IntervalTreeNode* getNext(IntervalTreeNode* node) const; + bool getMin(T& v) const; + bool getMax(T& v) const; +}; + +template inline IntervalTree::IntervalTree() { + nil_ = new IntervalTreeNode(); + nil_->left_ = nil_; + nil_->right_ = nil_; + nil_->parent_ = nil_; + nil_->red_ = false; + nil_->maxHigh_ = AbstractInterval::getSmallest(); + nil_->interval_ = new SimpleInterval(AbstractInterval::getSmallest(), AbstractInterval::getSmallest()); + root_ = new IntervalTreeNode(); + root_->parent_ = nil_; + root_->left_ = nil_; + root_->right_ = nil_; + root_->maxHigh_ = AbstractInterval::getLargest(); + root_->interval_ = new SimpleInterval(AbstractInterval::getLargest(), AbstractInterval::getLargest()); + root_->red_ = false; +} + +template inline void IntervalTree::deleteNode(IntervalTreeNode* node) { + if (node->left_ != nil_) { + deleteNode(node->left_); + } + if (node->right_ != nil_) { + deleteNode(node->right_); + } +} + +template inline IntervalTree::~IntervalTree() { + delete root_->interval_; + deleteNode(root_); + delete nil_->interval_; + delete nil_; + delete root_; +} + +template inline void IntervalTree::clear() { + if (root_->left_ != nil_) { + deleteNode(root_->left_); + root_->left_ = nil_; + } +} + +template inline void IntervalTree::leftRotate(IntervalTreeNode* x) { + IntervalTreeNode* y = x->right_; + x->right_ = y->left_; + if (y->left_ != nil_) { + y->left_->parent_ = x; + } + y->parent_ = x->parent_; + if (x == x->parent_->left_) { + x->parent_->left_ = y; + } else { + x->parent_->right_ = y; + } + y->left_ = x; + x->parent_ = y; + x->maxHigh_ = std::max(x->left_->maxHigh_, std::max(x->right_->maxHigh_, x->interval_->getMax())); + y->maxHigh_ = std::max(x->maxHigh_, std::max(y->right_->maxHigh_, y->interval_->getMax())); +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::rightRotate(IntervalTreeNode* y) { + IntervalTreeNode* x = y->left_; + y->left_ = x->right_; + if (nil_ != x->right_) { + x->right_->parent_ = y; + } + x->parent_ = y->parent_; + if (y == y->parent_->left_) { + y->parent_->left_ = x; + } else { + y->parent_->right_ = x; + } + x->right_ = y; + y->parent_ = x; + y->maxHigh_ = std::max(y->left_->maxHigh_, std::max(y->right_->maxHigh_, y->interval_->getMax())); + x->maxHigh_ = std::max(x->left_->maxHigh_, std::max(y->maxHigh_, x->interval_->getMax())); +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::insertHelp(IntervalTreeNode* z) { + z->left_ = nil_; + z->right_ = nil_; + IntervalTreeNode* x = root_->left_; + IntervalTreeNode* y = root_; + while (x != nil_) { + y = x; + if (x->interval_->compareTo(*z->interval_) > 0) { + x = x->left_; + } else { + x = x->right_; + } + } + z->parent_ = y; + if (y == root_ || y->interval_->compareTo(*z->interval_) > 0) { + y->left_ = z; + } else { + y->right_ = z; + } + assert(!nil_->red_); + assert(nil_->maxHigh_ == AbstractInterval::getSmallest()); +} + +template inline void IntervalTree::fixMaxHigh(IntervalTreeNode* x) { + while (x != root_) { + x->maxHigh_ = std::max(x->interval_->getMax(), std::max(x->left_->maxHigh_, x->right_->maxHigh_)); + x = x->parent_; + } +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::insert(AbstractInterval* interval) { + IntervalTreeNode* x = new IntervalTreeNode(interval); + insertHelp(x); + fixMaxHigh(x->parent_); + assert(x != nil_); + x->red_ = true; + while (x->parent_->red_) { + if (x->parent_ == x->parent_->parent_->left_) { + IntervalTreeNode* y = x->parent_->parent_->right_; + if (y->red_) { + x->parent_->red_ = false; + y->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + x = x->parent_->parent_; + } else { + if (x == x->parent_->right_) { + x = x->parent_; + leftRotate(x); + } + x->parent_->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + rightRotate(x->parent_->parent_); + } + } else { + IntervalTreeNode* y = x->parent_->parent_->left_; + if (y->red_) { + x->parent_->red_ = false; + y->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + x = x->parent_->parent_; + } else { + if (x == x->parent_->left_) { + x = x->parent_; + rightRotate(x); + } + x->parent_->red_ = false; + assert(x->parent_->parent_ != nil_); + x->parent_->parent_->red_ = true; + leftRotate(x->parent_->parent_); + } + } + } + root_->left_->red_ = false; +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline IntervalTreeNode* IntervalTree::find(const AbstractInterval& interval) const { + IntervalTreeNode* x = root_->left_; + while (x != nil_) { + const int cmp = x->interval_->compareTo(interval); + if (cmp > 0) { + x = x->left_; + } else if (cmp < 0) { + x = x->right_; + } else { + return x; + } + } + return 0; +} + +// STATIC +template inline bool IntervalTree::overlap(const T low1, const T high1, const T low2, const T high2) { + if (low1 <= low2) { + return low2 <= high1; + } else { + return low1 <= high2; + } +} + +template inline void IntervalTree::findOverlaps(IntervalTreeNode* x, const AbstractInterval& interval, SYSpVector, 256>& intervals) const { + if (x == nil_) { + return; + } + const T low = interval.getMin(); + if (low > x->maxHigh_) { + return; + } + findOverlaps(x->left_, interval, intervals); + const T high = interval.getMax(); + if (IntervalTree::overlap(low, high, x->interval_->getMin(), x->interval_->getMax())) { + intervals.append(x->interval_); + } + if (high < x->interval_->getMin()) { + return; + } + findOverlaps(x->right_, interval, intervals); +} + +template inline void IntervalTree::findOverlaps(const AbstractInterval& interval, SYSpVector, 256>& intervals) const { + findOverlaps(root_->left_, interval, intervals); +} + +template inline IntervalTreeNode* IntervalTree::getSuccessorOf(IntervalTreeNode* x) const { + IntervalTreeNode* y = x->right_; + if (y != nil_) { + while (y->left_ != nil_) { + y = y->left_; + } + return y; + } else { + y = x->parent_; + while (x == y->right_) { + x = y; + y = y->parent_; + } + if (y == root_) { + return nil_; + } + return y; + } +} +template inline void IntervalTree::removeFixUp(IntervalTreeNode* x) { + IntervalTreeNode* rootLeft = root_->left_; + while (!x->red_ && rootLeft != x) { + if (x == x->parent_->left_) { + IntervalTreeNode* w = x->parent_->right_; + if (w->red_) { + w->red_ = false; + assert(x->parent_ != nil_); + x->parent_->red_ = true; + leftRotate(x->parent_); + w = x->parent_->right_; + } + if (!w->right_->red_ && !w->left_->red_) { + assert(w != nil_); + w->red_ = true; + x = x->parent_; + } else { + if (!w->right_->red_) { + w->left_->red_ = false; + assert(w != nil_); + w->red_ = true; + rightRotate(w); + w = x->parent_->right_; + } + assert(!x->parent_->red_ || w != nil_); + w->red_ = x->parent_->red_; + x->parent_->red_ = false; + w->right_->red_ = false; + leftRotate(x->parent_); + x = rootLeft; + } + } else { + IntervalTreeNode* w = x->parent_->left_; + if (w->red_) { + w->red_ = false; + assert(x->parent_ != nil_); + x->parent_->red_ = true; + rightRotate(x->parent_); + w = x->parent_->left_; + } + if (!w->right_->red_ && !w->left_->red_) { + assert(w != nil_); + w->red_ = true; + x = x->parent_; + } else { + if (!w->left_->red_) { + w->right_->red_ = false; + assert(w != nil_); + w->red_ = true; + leftRotate(w); + w = x->parent_->left_; + } + assert(!x->parent_->red_ || w != nil_); + w->red_ = x->parent_->red_; + x->parent_->red_ = false; + w->left_->red_ = false; + rightRotate(x->parent_); + x=rootLeft; + } + } + } + x->red_ = false; +#ifndef NDEBUG + checkAssumptions(); +#endif +} + +template inline void IntervalTree::remove(const AbstractInterval& interval) { + IntervalTreeNode* z = find(interval); + if (z == 0) { + // Not found: do nothing. + return; + } + IntervalTreeNode* y = (z->left_ == nil_ || z->right_ == nil_) ? z : getSuccessorOf(z); + IntervalTreeNode* x = (y->left_ == nil_) ? y->right_ : y->left_; + x->parent_ = y->parent_; + if (root_ == x->parent_) { + root_->left_ = x; + } else { + if (y == y->parent_->left_) { + y->parent_->left_ = x; + } else { + y->parent_->right_ = x; + } + } + if (y != z) { + assert(y != nil_); + y->maxHigh_ = AbstractInterval::getSmallest(); + y->left_ = z->left_; + y->right_ = z->right_; + y->parent_ = z->parent_; + z->left_->parent_ = y; + z->right_->parent_ = y; + if (z == z->parent_->left_) { + z->parent_->left_ = y; + } else { + z->parent_->right_ = y; + } + fixMaxHigh(x->parent_); + if (!y->red_) { + assert(!z->red_ || z != nil_); + y->red_ = z->red_; + removeFixUp(x); + } else { + assert(!z->red_ || z != nil_); + y->red_ = z->red_; + } + z->left_ = nil_; + z->right_ = nil_; + delete z; + } else { + fixMaxHigh(x->parent_); + if (!y->red_) { + removeFixUp(x); + } + y->left_ = nil_; + y->right_ = nil_; + delete y; + } +#ifndef NDEBUG + checkAssumptions(); + assert(find(interval) == 0); +#endif +} + +template inline IntervalTreeNode* IntervalTree::getMin() const { + IntervalTreeNode* x = root_->left_; + if (x == nil_) { + return 0; + } else { + while (x->left_ != nil_) { + x = x->left_; + } + return x; + } +} + +template inline IntervalTreeNode* IntervalTree::getNext(IntervalTreeNode* node) const { + IntervalTreeNode* x = getSuccessorOf(node); + return x == nil_ ? 0 : x; +} + +template inline bool IntervalTree::getMin(T& v) const { + IntervalTreeNode* x = root_->left_; + if (x == nil_) { + return false; + } else { + while (x->left_ != nil_) { + x = x->left_; + } + v = x->interval_->getMin(); + return true; + } +} + +template inline bool IntervalTree::getMax(T& v) const { + if (root_->left_ == nil_) { + return false; + } else { + v = root_->left_->maxHigh_; + return true; + } +} + +#ifndef NDEBUG + +template inline T IntervalTree::checkMaxHighFieldsHelper(IntervalTreeNode* y, const T currentHigh, T match) const { + if (y != nil_) { + match = checkMaxHighFieldsHelper(y->left_, currentHigh, match) ? 1 : match; + assert(y->interval_->getMax() <= currentHigh); + if (y->interval_->getMax() == currentHigh) { + match = 1; + } + match = checkMaxHighFieldsHelper(y->right_, currentHigh, match) ? 1 : match; + } + return match; +} + +template inline void IntervalTree::checkMaxHighFields(IntervalTreeNode* x) const { + if (x != nil_) { + checkMaxHighFields(x->left_); + assert(checkMaxHighFieldsHelper(x, x->maxHigh_, 0) > 0); + checkMaxHighFields(x->right_); + } +} + +template inline void IntervalTree::checkOrder(IntervalTreeNode* x) const { + if (x != nil_) { + assert(x->left_ == nil_ || x->interval_->compareTo(*x->left_->interval_) > 0); + checkOrder(x->left_); + assert(x->right_ == nil_ || x->interval_->compareTo(*x->right_->interval_) <= 0); + checkOrder(x->right_); + } +} + +template inline void IntervalTree::checkAssumptions() const { + assert(nil_->interval_->getMin() == AbstractInterval::getSmallest()); + assert(nil_->interval_->getMax() == AbstractInterval::getSmallest()); + assert(nil_->maxHigh_ == AbstractInterval::getSmallest()); + assert(root_->interval_->getMin() == AbstractInterval::getLargest()); + assert(root_->interval_->getMax() == AbstractInterval::getLargest()); + assert(root_->maxHigh_ == AbstractInterval::getLargest()); + assert(nil_->red_ == false); + assert(root_->red_ == false); +#if 0 + // This can be very expensive if there are a lot of nodes! + checkMaxHighFields(root_->left_); + checkOrder(root_->left_); +#endif +} + +#endif + +} + +#endif /* #ifndef _engine_intervaltree_h_ */ diff --git a/storage/sparrow/engine/io.cc b/storage/sparrow/engine/io.cc new file mode 100644 index 000000000000..d603b548821e --- /dev/null +++ b/storage/sparrow/engine/io.cc @@ -0,0 +1,699 @@ +/* + IO helpers. +*/ + +#include "io.h" +#include "fileutil.h" +#include "purge.h" + +#include "../engine/log.h" +#include "mysys/mysys_priv.h" + +#ifdef _WIN32 +#include +//extern "C" File my_open_osfhandle(HANDLE handle, int oflag); +//extern "C" struct st_my_file_info* my_file_info; +#endif + +namespace Sparrow { + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IO +////////////////////////////////////////////////////////////////////////////////////////////////////// + +//pthread_key(IOContext*, IOContext::threadKey_); +thread_local IOContext* IOContext::threadKey_{nullptr}; + +uint8_t* IO::trashBuffer_ = 0; + +// STATIC +void IO::initialize() _THROW_(SparrowException) { + IOContext::initialize(); + trashBuffer_ = ByteBuffer::mmap(FileUtil::getPageSize()); +} + +// STATIC +void IOContext::initialize() { + IOContext::threadKey_ = nullptr; +} + +// STATIC +IOContext& IOContext::getContext() { + if (IOContext::threadKey_ == nullptr) { + IOContext::threadKey_ = new IOContext(0); + } + return *IOContext::threadKey_; +} + +// STATIC +IOContext& IOContext::get(const int nbEvents) _THROW_(SparrowException) { + if (IOContext::threadKey_->getNbEvents() < nbEvents) + { + IOContext::threadKey_->destroyEvents(); + IOContext::threadKey_->initEvents(nbEvents); + } + return *IOContext::threadKey_; +} + +// STATIC +void IOContext::destroy() { + if (IOContext::threadKey_ != nullptr) { + delete IOContext::threadKey_; + IOContext::threadKey_ = nullptr; + } +} + +// Opens a file. +// STATIC +int IO::open(const char* name, const FileMode mode) _THROW_(SparrowException) { + SPARROW_ENTER("IO::initialize"); + int file = -1; + uint32_t retries = 0; + const uint32_t maxRetries = 5; + while (true) { + bool notFound = false; +#ifdef _WIN32 + HANDLE handle; + switch (mode) { + case FILE_MODE_CREATE: { + handle = CreateFile(name, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, CREATE_ALWAYS, + FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH | FILE_FLAG_OVERLAPPED, 0); + break; + } + case FILE_MODE_READ: { + handle = CreateFile(name, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, + FILE_FLAG_NO_BUFFERING | FILE_FLAG_OVERLAPPED, 0); + break; + } + case FILE_MODE_UPDATE: { + handle = CreateFile(name, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, + FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH | FILE_FLAG_OVERLAPPED, 0); + break; + } + default: assert(0); + } + if (handle == INVALID_HANDLE_VALUE) { + notFound = GetLastError() == ERROR_PATH_NOT_FOUND; + } else { + file = my_get_filedescr(handle, 0); + //file = RegisterHandle(handle, 0); + //file = _open_osfhandle((intptr_t)handle, 0); + //file = my_win_open(handle, 0); + if (file == -1) { + CloseHandle(handle); + handle = INVALID_HANDLE_VALUE; // Just to be clean + } + } +#else + switch (mode) { + case FILE_MODE_CREATE: { + file = ::open(name, O_RDWR | O_CREAT | O_TRUNC, my_umask); + break; + } + case FILE_MODE_READ: { + file = ::open(name, O_RDONLY, my_umask); + break; + } + case FILE_MODE_UPDATE: { + file = ::open(name, O_RDWR, my_umask); + break; + } + default: assert(0); + } + if (file == -1) { + notFound = errno == ENOENT; + } +#endif + if (notFound && (mode == FILE_MODE_CREATE || mode == FILE_MODE_UPDATE) && ++retries <= maxRetries) { + FileUtil::createDirectories(name); + } else { + break; + } + } + if (file == -1) { + switch (mode) { + case FILE_MODE_CREATE: throw SparrowException::create(true, "Cannot create file %s", name); + case FILE_MODE_READ: throw SparrowException::create(true, "Cannot open file %s for reading", name); + case FILE_MODE_UPDATE: throw SparrowException::create(true, "Cannot open file %s for updating", name); + default: assert(0); + } + } else { + auto cnv_mode = [mode]() { + file_info::OpenType open_mode{file_info::OpenType::UNOPEN}; + switch (mode) { + case FILE_MODE_CREATE: open_mode = file_info::OpenType::FILE_BY_CREATE; break; + case FILE_MODE_READ: + case FILE_MODE_UPDATE: open_mode = file_info::OpenType::FILE_BY_OPEN; break; + default: assert(0); + } + return open_mode; + }; + file_info::RegisterFilename(file, name, cnv_mode()); + } + // Not required anymore. File counting is done in methods CountFileOpen() in my_static.cc which is called + // file_info::RegisterFilename + //thread_safe_increment(my_file_opened, &THR_LOCK_open); + + + // Try to bypass buffering on Unix. +#ifdef O_DIRECT // Linux or MacOS + static bool directioSupport = true; + if (fcntl(file, F_SETFL, O_DIRECT) == -1) { + if (directioSupport) { + directioSupport = false; + spw_print_warning("Call to fcntl(O_DIRECT) failed: maybe the database file system does not support direct I/O"); + } + } +#elif defined(__SunOS) // Solaris + static bool directioSupport = true; + if (directio(file, DIRECTIO_ON) == -1) { + if (directioSupport) { + directioSupport = false; + spw_print_warning("Call to directio() failed: maybe the database file system does not support direct I/O"); + } + } +#endif + return file; +} + + +// STATIC +void IO::close(File file) +{ + if (file == -1) return; + + // my_close calls file_info::UnregisterFilename() so we don't need to call it here. + my_close(file, MYF(0)); +} + +// Reads a single data block from a file. +// STATIC +uint32_t IO::read(const int file, const char* name, const uint64_t offset, uint8_t* data, const uint32_t size) _THROW_(SparrowException) { + SPARROW_ENTER("IO::read"); + assert(size % FileUtil::getSectorSize() == 0); + if (offset % sparrow_cache_block_size != 0) { + throw SparrowException::create(false, "Seek at invalid offset %llu in file %s", static_cast(offset), name); + } + bool ok = true; + if (size <= sparrow_small_read_block_size) Atomic::inc64(&SparrowStatus::get().ioNbSmall_); + else if (size <= sparrow_medium_read_block_size) Atomic::inc64(&SparrowStatus::get().ioNbMedium_); + else Atomic::inc64(&SparrowStatus::get().ioNbLarge_); +#ifdef _WIN32 + OVERLAPPED* overlapped = IOContext::getOverlapped(); + overlapped->Offset = (DWORD)offset; + overlapped->OffsetHigh = (DWORD)(offset >> 32); + HANDLE handle = my_get_osfhandle(file); + DWORD readBytes; + if (ReadFile(handle, data, size, &readBytes, overlapped) == 0) { + if (GetLastError() != ERROR_IO_PENDING + || GetOverlappedResult(handle, overlapped, &readBytes, true) == 0) { + ok = false; + } + } +#elif defined(__MACH__) + ssize_t readBytes = ::pread(file, data, size, offset); + ok = readBytes != static_cast(-1); +#elif defined(__linux__) + ssize_t readBytes; + if (sparrow_async_io) { + IOContext& ctx = IOContext::get(1); + struct iocb** iocb = ctx.getIocb(); + io_prep_pread(iocb[0], file, data, size, offset); + int result = io_submit(ctx.get(), 1, iocb); + if (result == 1) { + ok = true; + } else { + ok = false; + errno = -result; + } + while (ok) { + struct io_event* events = ctx.getEvents(); + result = io_getevents(ctx.get(), 1, 1, events, 0); + if (result == 1) { + if (events->res2 == 0) { + readBytes = events->res; + break; + } else { + errno = -events->res2; + ok = false; + } + } else if (result != -EAGAIN && result != -EINTR) { + errno = -result; + ok = false; + } + } + } else { + readBytes = ::pread64(file, data, size, offset); + ok = readBytes != static_cast(-1); + } +#elif defined(__SunOS) + ssize_t readBytes; + if (sparrow_async_io) { + IOContext& ctx = IOContext::get(1); + aiocb64_t* iocb = ctx.getIocb(); + iocb->aio_reqprio = 0; + iocb->aio_lio_opcode = LIO_READ; + iocb->aio_fildes = file; + iocb->aio_nbytes = size; + iocb->aio_buf = data; + iocb->aio_offset = offset; + ok = aio_read64(iocb) == 0; + port_event_t* events = ctx.getEvents(); + if (ok) { + ok = port_get(ctx.getPort(), events, 0) == 0; + } + if (ok ){ + iocb = (aiocb64_t*)events->portev_object; + const int check = iocb->aio_resultp.aio_errno; + if (check == 0) { + readBytes = aio_return64(iocb); + } else { + errno = check; + ok = false; + } + } + } else { + readBytes = ::pread64(file, data, size, offset); + ok = readBytes != static_cast(-1); + } +#else +#error Platform not supported +#endif + if (!ok) { + throw SparrowException::create(true, "Cannot read file %s at offset %llu", name, static_cast(offset)); + } + return static_cast(readBytes); +} + +// Reads multiple data blocks from a file. +// STATIC +uint32_t IO::readMultiple(const int file, Lock* lock, const char* name, const uint64_t offset, uint8_t** data, const uint32_t size) _THROW_(SparrowException) { + SPARROW_ENTER("IO::readMultiple"); + bool ok = true; + if (size <= sparrow_small_read_block_size) Atomic::inc64(&SparrowStatus::get().ioNbSmall_); + else if (size <= sparrow_medium_read_block_size) Atomic::inc64(&SparrowStatus::get().ioNbMedium_); + else Atomic::inc64(&SparrowStatus::get().ioNbLarge_); +#ifdef _WIN32 + OVERLAPPED* overlapped = IOContext::getOverlapped(); + overlapped->Offset = (DWORD)offset; + overlapped->OffsetHigh = (DWORD)(offset >> 32); + + // Each segment is a page. + const uint32_t pageSize = FileUtil::getPageSize(); + uint32_t pages = (size + pageSize - 1) / pageSize; + uint32_t pagesPerBlock = sparrow_cache_block_size / pageSize; + FILE_SEGMENT_ELEMENT* segments = static_cast(IOContext::getTempBuffer2((pages + 1) * sizeof(FILE_SEGMENT_ELEMENT))); + memset(&segments[pages], 0, sizeof(FILE_SEGMENT_ELEMENT)); + uint32_t block = 0; + for (uint32_t i = 0; i < pages; ++i) { + uint32_t page = i % pagesPerBlock; + if (i != 0 && page == 0) { + block++; + } + uint8_t* d = data[block]; + uint8_t* p = d == 0 ? trashBuffer_ : d + page * pageSize; + segments[i].Buffer = PtrToPtr64(p); + } + HANDLE handle = my_get_osfhandle(file); + DWORD readBytes; + if (ReadFileScatter(handle, segments, size, 0, overlapped) == 0) { + if (GetLastError() != ERROR_IO_PENDING + || GetOverlappedResult(handle, overlapped, &readBytes, true) == 0) { + ok = false; + } + } else { + readBytes = size; + } +#elif defined(__MACH__) + const uint32_t blocks = (size + sparrow_cache_block_size - 1) / sparrow_cache_block_size; + ssize_t readBytes = 0; + if (blocks <= IOV_MAX) { + struct iovec segments[IOV_MAX]; + uint32_t length = size; + for (uint32_t i = 0; i < blocks; ++i) { + struct iovec& v = segments[i]; + uint8_t* d = data[i]; + v.iov_base = (char*)(d == 0 ? trashBuffer_ : d); + v.iov_len = std::min(length, sparrow_cache_block_size); + length -= sparrow_cache_block_size; + } + + // TODO use preadv when it is supported by redhat + // Linux RedHat does not support preadv so we have to lock the file to avoid concurrent seek and read operations. + Guard guard(*lock); + ok = lseek(file, offset, SEEK_SET) == static_cast(offset); + if (ok) { + readBytes = ::readv(file, segments, blocks); + } + } else { + // Cannot use vectored I/O because there are too many blocks: read into a temporary + // buffer, and copy blocks. + uint8_t* buffer = ByteBuffer::mmap(size); + readBytes = ::pread(file, buffer, size, offset); + if (readBytes > 0) { + uint32_t length = size; + for (uint32_t i = 0; i < blocks; ++i) { + uint8_t* d = data[i]; + if (d != 0) { + const uint32_t blockSize = std::min(length, sparrow_cache_block_size); + memcpy(d, buffer + i * sparrow_cache_block_size, blockSize); + } + length -= sparrow_cache_block_size; + } + } + ByteBuffer::munmap(buffer, size); + } + ok = readBytes != static_cast(-1); +#elif defined(__linux__) + const uint32_t blocks = (size + sparrow_cache_block_size - 1) / sparrow_cache_block_size; + ssize_t readBytes = 0; + if (sparrow_async_io) { + IOContext& ctx = IOContext::get(blocks); + struct iocb** iocb = ctx.getIocb(); + uint32_t length = size; + uint64_t o = offset; + int actual = 0; + for (uint32_t i = 0; i < blocks; ++i) { + uint8_t* d = data[i]; + const uint32_t l = std::min(length, sparrow_cache_block_size); + if (d == 0) { + readBytes += l; + } else { + io_prep_pread(iocb[actual], file, static_cast(d), l, o); + ++actual; + } + length -= sparrow_cache_block_size; + o += sparrow_cache_block_size; + } + int result = io_submit(ctx.get(), actual, iocb); + if (result == actual) { + ok = true; + } else { + ok = false; + errno = -result; + } + int count = 0; + while (ok && count < actual) { + struct io_event* events = ctx.getEvents(); + result = io_getevents(ctx.get(), 1, actual - count, events, 0); + if (result >= 1) { + count += result; + for (int i = 0; i < result; ++i) { + if (events[i].res2 == 0) { + readBytes += events[i].res; + } else { + errno = -events[i].res2; + ok = false; + } + } + } else if (result != -EAGAIN && result != -EINTR) { + errno = -result; + ok = false; + } + } + } else { + if (blocks <= IOV_MAX) { + struct iovec segments[IOV_MAX]; + uint32_t length = size; + for (uint32_t i = 0; i < blocks; ++i) { + struct iovec& v = segments[i]; + uint8_t* d = data[i]; + v.iov_base = (char*)(d == 0 ? trashBuffer_ : d); + v.iov_len = std::min(length, sparrow_cache_block_size); + length -= sparrow_cache_block_size; + } + + // TODO use preadv when it is supported by redhat + // Linux RedHat does not support preadv so we have to lock the file to avoid concurrent seek and read operations. + Guard guard(*lock); + ok = lseek(file, offset, SEEK_SET) == static_cast(offset); + if (ok) { + readBytes = ::readv(file, segments, blocks); + } + } else { + // Cannot use vectored I/O because there are too many blocks: read into a temporary + // buffer, and copy blocks. + uint8_t* buffer = ByteBuffer::mmap(size); + readBytes = ::pread64(file, buffer, size, offset); + if (readBytes > 0) { + uint32_t length = size; + for (uint32_t i = 0; i < blocks; ++i) { + uint8_t* d = data[i]; + if (d != 0) { + const uint32_t blockSize = std::min(length, sparrow_cache_block_size); + memcpy(d, buffer + i * sparrow_cache_block_size, blockSize); + } + length -= sparrow_cache_block_size; + } + } + ByteBuffer::munmap(buffer, size); + } + ok = readBytes != static_cast(-1); + } +#elif defined(__SunOS) + const uint32_t blocks = (size + sparrow_cache_block_size - 1) / sparrow_cache_block_size; + ssize_t readBytes = 0; + if (sparrow_async_io) { + IOContext& ctx = IOContext::get(blocks); + aiocb64_t* iocb = ctx.getIocb(); + uint32_t length = size; + uint64_t o = offset; + int actual = 0; + for (uint32_t i = 0; i < blocks; ++i) { + uint8_t* d = data[i]; + const uint32_t l = std::min(length, sparrow_cache_block_size); + if (d == 0) { + readBytes += l; + } else { + iocb[actual].aio_reqprio = 0; + iocb[actual].aio_lio_opcode = LIO_READ; + iocb[actual].aio_fildes = file; + iocb[actual].aio_nbytes = l; + iocb[actual].aio_buf = d; + iocb[actual].aio_offset = o; + ok = aio_read64(iocb + actual) == 0; + if (!ok) { + break; + } + ++actual; + } + length -= sparrow_cache_block_size; + o += sparrow_cache_block_size; + } + port_event_t* events = ctx.getEvents(); + if (ok) { + uint_t n = actual; + ok = port_getn(ctx.getPort(), events, actual, &n, 0) == 0; + } + if (ok) { + for (int i = 0; i < actual; ++i) { + iocb = (aiocb64_t*)events[i].portev_object; + const int check = iocb->aio_resultp.aio_errno; + if (check == 0) { + readBytes += aio_return64(iocb); + } else { + errno = check; + ok = false; + break; + } + } + } + } else { + if (blocks <= IOV_MAX) { + struct iovec segments[IOV_MAX]; + uint32_t length = size; + for (uint32_t i = 0; i < blocks; ++i) { + struct iovec& v = segments[i]; + uint8_t* d = data[i]; + v.iov_base = (char*)(d == 0 ? trashBuffer_ : d); + v.iov_len = std::min(length, sparrow_cache_block_size); + length -= sparrow_cache_block_size; + } + + // Solaris does not support preadv (see http://bugs.opensolaris.org/bugdatabase/view_bug.do?bug_id=1167819), + // so we have to lock the file to avoid concurrent seek and read operations. + Guard guard(*lock); + ok = lseek(file, offset, SEEK_SET) == static_cast(offset); + if (ok) { + readBytes = ::readv(file, segments, blocks); + } + } else { + // Cannot use vectored I/O because there are too many blocks: read into a temporary + // buffer, and copy blocks. + uint8_t* buffer = ByteBuffer::mmap(size); + readBytes = ::pread64(file, buffer, size, offset); + if (readBytes > 0) { + uint32_t length = size; + for (uint32_t i = 0; i < blocks; ++i) { + uint8_t* d = data[i]; + if (d != 0) { + const uint32_t blockSize = std::min(length, sparrow_cache_block_size); + memcpy(d, buffer + i * sparrow_cache_block_size, blockSize); + } + length -= sparrow_cache_block_size; + } + } + ByteBuffer::munmap(buffer, size); + } + ok = readBytes != static_cast(-1); + } +#else +#error Platform not supported +#endif + if (!ok) { + throw SparrowException::create(true, "Cannot read file %s at offset %llu", name, static_cast(offset)); + } + return static_cast(readBytes); +} + +// Write a data block to a file. +// STATIC +uint32_t IO::write(const int file, const char* name, const uint64_t offset, uint8_t* data, const uint32_t size) _THROW_(SparrowException) { + SPARROW_ENTER("IO::write"); + + // On Windows, we can only write a number of bytes multiple of the sector size, + // so adjust size to sector size and zero extra bytes. + // Note 1: this happens only when writing the last block of the file. + // Note 2: the size is adjusted even on Unix, where this is maybe not necessary, + // to keep the files platform-independent. + const uint32_t adjustedSize = static_cast(FileUtil::adjustSizeToSectorSize(size)); + memset(data + size, 0, adjustedSize - size); + int retries = 0; + while (true) { + bool ok = true; + bool retry = false; +#ifdef _WIN32 + OVERLAPPED* overlapped = IOContext::getOverlapped(); + overlapped->Offset = (DWORD)offset; + overlapped->OffsetHigh = (DWORD)(offset >> 32); + HANDLE handle = my_get_osfhandle(file); + DWORD writtenBytes = 0; + if (WriteFile(handle, data, adjustedSize, &writtenBytes, overlapped) == 0) { + if (GetLastError() != ERROR_IO_PENDING + || GetOverlappedResult(handle, overlapped, &writtenBytes, true) == 0) { + ok = false; + retry = (GetLastError() == ERROR_DISK_FULL || (writtenBytes < adjustedSize && writtenBytes > 0)); + } + } +#elif defined(__MACH__) + ssize_t writtenBytes = ::pwrite(file, data, adjustedSize, offset); + ok = writtenBytes != static_cast(adjustedSize); + retry = errno == ENOSPC || (writtenBytes < adjustedSize && writtenBytes > 0); +#elif defined(__linux__) + ssize_t writtenBytes = 0; + if (sparrow_async_io) { + IOContext& ctx = IOContext::get(1); + struct iocb** iocb = ctx.getIocb(); + io_prep_pwrite(iocb[0], file, data, adjustedSize, offset); + int result = io_submit(ctx.get(), 1, iocb); + if (result == 1) { + ok = true; + } else { + ok = false; + errno = -result; + } + while (ok) { + struct io_event* events = ctx.getEvents(); + result = io_getevents(ctx.get(), 1, 1, events, 0); + if (result == 1) { + int result = (int)events->res; + if (result < 0) { + errno = -result; + ok = false; + retry = errno == ENOSPC; + } else if (events->res2 == 0) { + writtenBytes = events->res; + if (writtenBytes < adjustedSize && writtenBytes > 0) { + retry = true; + } + break; + } else { + errno = -events->res2; + ok = false; + retry = errno == ENOSPC; + } + break; + } else if (result != -EAGAIN && result != -EINTR) { + errno = -result; + ok = false; + } + } + } else { + writtenBytes = ::pwrite(file, data, adjustedSize, offset); + ok = writtenBytes == static_cast(adjustedSize); + retry = errno == ENOSPC || (writtenBytes < adjustedSize && writtenBytes > 0); + } +#elif defined(__SunOS) + ssize_t writtenBytes = 0; + if (sparrow_async_io) { + IOContext& ctx = IOContext::get(1); + aiocb64_t* iocb = ctx.getIocb(); + iocb->aio_reqprio = 0; + iocb->aio_lio_opcode = LIO_WRITE; + iocb->aio_fildes = file; + iocb->aio_nbytes = adjustedSize; + iocb->aio_buf = data; + iocb->aio_offset = offset; + ok = aio_write64(iocb) == 0; + port_event_t* events = ctx.getEvents(); + if (ok) { + ok = port_get(ctx.getPort(), events, 0) == 0; + } + if (ok) { + iocb = (aiocb64_t*)events->portev_object; + const int check = iocb->aio_resultp.aio_errno; + if (check == 0) { + writtenBytes = aio_return64(iocb); + if (writtenBytes < adjustedSize && writtenBytes > 0) { + retry = true; + } + } else { + errno = check; + ok = false; + retry = errno == ENOSPC; + } + } + } else { + writtenBytes = ::pwrite(file, data, adjustedSize, offset); + ok = writtenBytes == static_cast(adjustedSize); + retry = errno == ENOSPC || (writtenBytes < adjustedSize && writtenBytes > 0); + } +#else +#error Platform not supported +#endif + if (retry && retries++ < 10) { + if (retries == 1) { + spw_print_information("Writing %u bytes in file %s failed. Only %u bytes were written (%s). Triggering purge.", + adjustedSize, name, (uint)writtenBytes, strerror(errno)); + } + Purge::wakeUp(true); + my_sleep(1000000); + } else { + if (!ok) { + throw SparrowException::create(true, "Cannot write %u bytes to file %s at offset %llu", adjustedSize, name, static_cast(offset)); + } + if (writtenBytes != adjustedSize) { + throw SparrowException::create(false, "Incomplete write file %s (%ld bytes written instead of %u)", name, writtenBytes, adjustedSize); + } + break; + } + } + return adjustedSize; +} + +// STATIC +void IO::flush(const int file, const char* name) _THROW_(SparrowException) { +#ifdef _WIN32 + HANDLE handle = my_get_osfhandle(file); + const bool ok = FlushFileBuffers(handle) != 0; +#else + const bool ok = fsync(file) == 0; +#endif + if (!ok) { + throw SparrowException::create(true, "Cannot flush file %s", name); + } +} + +} + diff --git a/storage/sparrow/engine/io.h b/storage/sparrow/engine/io.h new file mode 100644 index 000000000000..d5bb608f4e1d --- /dev/null +++ b/storage/sparrow/engine/io.h @@ -0,0 +1,326 @@ +/* + IO helpers. +*/ + +#ifndef _engine_io_h_ +#define _engine_io_h_ + +#include "exception.h" +#include "types.h" + +#if defined(__linux__) +#include +#include +#elif defined(__SunOS) +#include +#include +#elif defined(__MACH__) +#include +#include +#include +#endif + +// We probably don't need these additionnal thread local services, but gthey could be usefull. +// #include "my_thread_local.h" + +extern mysql_mutex_t THR_LOCK_open; + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IOBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IOBuffer : public ByteBuffer { +private: + + const uint32_t capacity_; + +public: + + IOBuffer(const uint32_t capacity) : ByteBuffer(ByteBuffer::mmap(capacity), capacity), capacity_(capacity) { + Atomic::inc32(&SparrowStatus::get().ioBuffers_); + Atomic::add64(&SparrowStatus::get().ioBufferSize_, static_cast(capacity_)); + } + + ~IOBuffer() { + ByteBuffer::munmap(getData(), capacity_); + Atomic::dec32(&SparrowStatus::get().ioBuffers_); + Atomic::add64(&SparrowStatus::get().ioBufferSize_, -static_cast(capacity_)); + } + + uint32_t capacity() const { + return capacity_; + } +}; + +typedef SYSarray TempBuffer; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IOContext +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IOContext { +private: + + static thread_local IOContext* threadKey_; + + int nbEvents_; + + IOBuffer* buffer_; + + TempBuffer* tempBuffer1_; + TempBuffer* tempBuffer2_; + TempBuffer* tempBuffer3_; + +#ifdef _WIN32 + OVERLAPPED overlapped_; +#elif defined(__linux__) + io_context_t context_; + SYSpVector iocb_; + struct io_event* events_; +#elif defined(__SunOS) + int port_; + port_notify_t portNotify_; + aiocb64_t* iocb_; + port_event_t* events_; +#endif + +private: + + static IOContext& getContext(); + + void destroyBuffers() { + if (buffer_ != 0) { delete buffer_; buffer_ = 0; } + if (tempBuffer1_ != 0) { delete tempBuffer1_; tempBuffer1_ = 0; } + if (tempBuffer2_ != 0) { delete tempBuffer2_; tempBuffer2_ = 0; } + if (tempBuffer3_ != 0) { delete tempBuffer3_; tempBuffer3_ = 0; } + } + +public: + + static void initialize(); + + int getNbEvents() const { + return nbEvents_; + } + + static IOContext& get(const int nbEvents) _THROW_(SparrowException); + + static void destroy(); + +#ifdef _WIN32 + + IOContext(const int nbEvents) _THROW_(SparrowException) : nbEvents_(nbEvents), buffer_(0), tempBuffer1_(0), tempBuffer2_(0), tempBuffer3_(0) { + memset(&overlapped_, 0, sizeof(overlapped_)); + HANDLE handle = CreateEvent(0, false, false, 0); + if (handle == 0) { + throw SparrowException::create(true, "Cannot create event for OVERLAPPED structure"); + } + overlapped_.hEvent = handle; + } + + ~IOContext() { + CloseHandle(overlapped_.hEvent); + destroyBuffers(); + } + + void initEvents([[maybe_unused]] const int nbEvents){ + } + + void destroyEvents() {} + + static OVERLAPPED* getOverlapped() _THROW_(SparrowException) { + return &getContext().overlapped_; + } + +#elif defined(__MACH__) + + IOContext(const int nbEvents) _THROW_(SparrowException) : nbEvents_(nbEvents), buffer_(0), tempBuffer1_(0), tempBuffer2_(0), tempBuffer3_(0) { + } + + ~IOContext() { + destroyBuffers(); + } + +#elif defined(__linux__) + + IOContext(const int nbEvents) _THROW_(SparrowException) : nbEvents_(0), buffer_(0), tempBuffer1_(0), tempBuffer2_(0), tempBuffer3_(0), events_(0) { + initEvents(nbEvents); + } + + ~IOContext() { + destroyEvents(); + destroyBuffers(); + } + + void initEvents(const int nbEvents) + { + destroyEvents(); + + int attempts = 0; + while (nbEvents > 0) { + memset(&context_, 0, sizeof(context_)); + const int test = io_setup(nbEvents, &context_); + if (test == 0) { + break; + } else if (++attempts == 10 || test != -EAGAIN) { + errno = -test; + throw SparrowException::create(true, "Cannot initialize IO context for %u events", nbEvents); + } + } + iocb_.resize(nbEvents); + for (int i = 0; i < nbEvents; ++i) { + iocb_.append(new struct iocb); + } + events_ = new struct io_event[nbEvents]; + + nbEvents_ = nbEvents; + } + + void destroyEvents() { + if (nbEvents_ > 0) { + io_destroy(context_); + nbEvents_ = 0; + } + iocb_.clearAndDestroy(); + if ( events_ != NULL ) { + delete [] events_; + events_ = NULL; + } + } + + io_context_t get() { + return context_; + } + + struct iocb** getIocb() { + return const_cast(iocb_.data()); + } + + struct io_event* getEvents() { + memset(events_, 0, sizeof(events_[0]) * nbEvents_); + return events_; + } + +#elif defined(__SunOS) + + IOContext(const int nbEvents) _THROW_(SparrowException) : nbEvents_(nbEvents), buffer_(0), tempBuffer1_(0), tempBuffer2_(0), tempBuffer3_(0), + iocb_(0), events_(0) { + port_ = port_create(); + if (port_ < 0) { + throw SparrowException::create(true, "Cannot create completion port"); + } + memset(&portNotify_, 0, sizeof(portNotify_)); + portNotify_.portnfy_port = port_; + iocb_ = new aiocb64_t[nbEvents_]; + events_ = new port_event_t[nbEvents_]; + } + + ~IOContext() { + close(port_); + if ( iocb_ != NULL ) + delete [] iocb_; + if ( events_ != NULL ) + delete [] events_; + destroyBuffers(); + } + + int getPort() const { + return port_; + } + + aiocb64_t* getIocb() { + for (int i = 0; i < nbEvents_; ++i) { + iocb_[i].aio_sigevent.sigev_notify = SIGEV_PORT; + iocb_[i].aio_sigevent.sigev_value.sival_ptr = &portNotify_; + } + return iocb_; + } + + port_event_t* getEvents() { + return events_; + } + +#endif + + static ByteBuffer& getBuffer(const uint32_t size) { + IOContext& ctx = getContext(); + if (ctx.buffer_ == 0 || ctx.buffer_->capacity() < size) { + if (ctx.buffer_ != 0) delete ctx.buffer_; + ctx.buffer_ = new IOBuffer(size); + } + ctx.buffer_->position(0); + ctx.buffer_->limit(size); + return *ctx.buffer_; + } + + static void* getTempBuffer1(const uint64_t size) { + IOContext& ctx = getContext(); + if (ctx.tempBuffer1_ == 0 || ctx.tempBuffer1_->length() < size) { + if (ctx.tempBuffer1_ != 0) delete ctx.tempBuffer1_; + ctx.tempBuffer1_ = new TempBuffer(static_cast(size)); + } + return (void*)ctx.tempBuffer1_->data(); + } + + static void* getTempBuffer2(const uint64_t size) { + IOContext& ctx = getContext(); + if (ctx.tempBuffer2_ == 0 || ctx.tempBuffer2_->length() < size) { + if (ctx.tempBuffer2_ != 0) delete ctx.tempBuffer2_; + ctx.tempBuffer2_ = new TempBuffer(static_cast(size)); + } + return (void*)ctx.tempBuffer2_->data(); + } + + static void* getTempBuffer3(const uint64_t size) { + IOContext& ctx = getContext(); + if (ctx.tempBuffer3_ == 0 || ctx.tempBuffer3_->length() < size) { + if (ctx.tempBuffer3_ != 0) delete ctx.tempBuffer3_; + ctx.tempBuffer3_ = new TempBuffer(static_cast(size)); + } + return (void*)ctx.tempBuffer3_->data(); + } + +private: + + IOContext(const IOContext& right); + IOContext& operator = (const IOContext& right); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IO +////////////////////////////////////////////////////////////////////////////////////////////////////// + +enum FileMode { + FILE_MODE_CREATE = 0, + FILE_MODE_READ = 1, + FILE_MODE_UPDATE = 2 +}; + +class IO { +private: + + static uint8_t* trashBuffer_; + +public: + + static void initialize() _THROW_(SparrowException); + + static int open(const char* name, const FileMode mode) _THROW_(SparrowException); + + static void close(File file); + + static uint32_t read(const int file, const char* name, const uint64_t offset, uint8_t* data, const uint32_t size) _THROW_(SparrowException); + + static uint32_t readMultiple(const int file, Lock* lock, const char* name, const uint64_t offset, uint8_t** data, const uint32_t size) _THROW_(SparrowException); + + static uint32_t write(const int file, const char* name, const uint64_t offset, uint8_t* data, const uint32_t size) _THROW_(SparrowException); + + static void flush(const int file, const char* name) _THROW_(SparrowException); +}; + +} + +#endif /* #ifndef _engine_io_h_ */ + diff --git a/storage/sparrow/engine/list.h b/storage/sparrow/engine/list.h new file mode 100644 index 000000000000..3c97a82c88b9 --- /dev/null +++ b/storage/sparrow/engine/list.h @@ -0,0 +1,1003 @@ +/* + Non-intrusive single-linked list and intrusive double-linked lists. + */ + +#ifndef _engine_list_h_ +#define _engine_list_h_ + +#include "my_base.h" +#include +#include "log.h" + +namespace Sparrow { + +// constant for "not found" +#ifndef SYS_NPOS +#define SYS_NPOS (~(static_cast(0))) +#endif + +template class SYSslink { +public: + + SYSslink(const T& object, SYSslink* next); + SYSslink* getNext() const; + void setNext(SYSslink* next); + const T& getObject() const; + T& getObject(); + void setObject(const T& object); + +protected: + + T object_; + SYSslink* next_; +}; + +template inline SYSslink::SYSslink(const T& object, SYSslink* next) : object_(object), next_(next) { +} + +template inline SYSslink* SYSslink::getNext() const { + return next_; +} + +template inline void SYSslink::setNext(SYSslink* next) { + next_ = next; +} + +template inline const T& SYSslink::getObject() const { + return object_; +} + +template inline T& SYSslink::getObject() { + return object_; +} + +template inline void SYSslink::setObject(const T& object) { + object_ = object; +} + +// +// Default allocator for single-linked lists. +// +template class SYSslAllocator { +public: + + SYSslAllocator() { + } + SYSslink* acquire(const T& object, SYSslink* next) { + return new SYSslink(object, next); + } + void release(SYSslink* link) { + delete link; + } +}; + +// +// Pool allocator for single-linked lists. +// +template class SYSslPoolAllocator { +private: + + SYSslink* root_; + +public: + + SYSslPoolAllocator() : + root_(0) { + } + ~SYSslPoolAllocator() { + SYSslink* link = root_; + while (link != 0) { + SYSslink* next = link->getNext(); + delete link; + link = next; + } + } + SYSslink* acquire(const T& object, SYSslink* next) { + if (root_ == 0) { + SYSslink* l = new SYSslink(object, next); + if (l == 0) { + spw_print_error("SYShPoolAllocator::acquire: cannot allocate %llu bytes of memory", static_cast(sizeof(*l))); + } + return l; + } else { + SYSslink* link = root_; + root_ = root_->getNext(); + link->setObject(object); + link->setNext(next); + return link; + } + } + void release(SYSslink* link) { + link->setNext(root_); + root_ = link; + } +}; + +template class SYSslistIterator; + +template > class SYSslist: public A { + friend class SYSslistIterator ; + +public: + + // constructors + SYSslist(); + SYSslist(const SYSslist& right); + + // destructor + ~SYSslist(); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + const T& operator [](const uint32_t index) const; + T& operator [](const uint32_t index); + const T& first() const; + const T& last() const; + T& first(); + T& last(); + + // operations + void insert(const T& t); + void append(const T& t); + void prepend(const T& t); + void insertAt(const uint32_t index, const T& t); + bool remove(const T& t); + T removeAt(uint32_t index); + void removeLast(); + void removeFirst(); + uint32_t index(const T& t) const; + bool contains(const T& t) const; + bool find(const T& t, T& result) const; + void clear(); + void getAll(SYSslist& list) { + list.first_ = first_; + list.last_ = last_; + list.n_ = n_; + first_ = 0; + last_ = 0; + n_ = 0; + } + void appendAll(SYSslist& list) { + if (!list.isEmpty()) { + if (isEmpty()) { + first_ = list.first_; + last_ = list.last_; + } else { + last_->setNext(list.first_); + last_ = list.last_; + } + n_ += list.n_; + list.first_ = 0; + list.last_ = 0; + list.n_ = 0; + } + } + + // copy + SYSslist& operator =(const SYSslist& right); + +protected: + + SYSslink* atPosition(uint32_t index) const; + +private: + + // equality not implemented + bool operator ==(const SYSslist& right) const; + +protected: + + SYSslink* first_; + SYSslink* last_; + uint32_t n_; +}; + +template inline uint32_t SYSslist::entries() const { + return n_; +} + +template inline bool SYSslist::isEmpty() const { + return (n_ == 0); +} + +template inline SYSslink* SYSslist::atPosition(uint32_t index) const { + assert(index < n_); + SYSslink* sl = first_; + while (sl != 0) { + if (index-- == 0) { + return sl; + } + sl = sl->getNext(); + } + return 0; // not reached +} + +template inline void SYSslist::insert(const T& t) { + SYSslink* sl = this->acquire(t, 0); + if (n_ == 0) { + first_ = sl; + last_ = first_; + } else { + last_->setNext(sl); + last_ = sl; + } + n_++; +} + +template inline void SYSslist::append(const T& t) { + insert(t); +} + +template inline void SYSslist::insertAt(const uint32_t index, const T& t) { + assert(index <= n_); + SYSslink* nsl = this->acquire(t, 0); + SYSslink* psl = index == 0 ? 0 : atPosition(index - 1); + if (psl == 0) { + nsl->setNext(first_); + first_ = nsl; + if (n_ == 0) { + last_ = first_; + } + } else { + nsl->setNext(psl->getNext()); + psl->setNext(nsl); + if (psl == last_) { + last_ = nsl; + } + } + n_++; +} + +template inline void SYSslist::prepend(const T& t) { + insertAt(0, t); +} + +template inline T SYSslist::removeAt(uint32_t index) { + assert(index < n_); + T result{}; + SYSslink* sl = first_; + if (n_ == 1) { + result = sl->getObject(); + this->release(sl); + first_ = 0; + last_ = 0; + n_ = 0; + } else { + SYSslink* psl = 0; + while (sl != 0) { + if (index-- == 0) { + if (psl != 0) { + psl->setNext(sl->getNext()); + } + if (sl == first_) { + first_ = sl->getNext(); + } else if (sl == last_) { + last_ = psl; + } + n_--; + result = sl->getObject(); + this->release(sl); + break; + } else { + psl = sl; + sl = sl->getNext(); + } + } + } + return result; +} + +template inline void SYSslist::removeLast() { + assert(n_ > 0); + removeAt(n_ - 1); +} + +template inline void SYSslist::removeFirst() { + assert(n_ > 0); + removeAt(0); +} + +template inline bool SYSslist::remove(const T& t) { + bool result = false; + SYSslink* sl = first_; + if (n_ == 1) { + if (sl->getObject() == t) { + this->release(sl); + first_ = 0; + last_ = 0; + n_ = 0; + result = true; + } + } else if (n_ > 1) { + SYSslink* psl = 0; + while (sl != 0) { + if (sl->getObject() == t) { + if (psl != 0) { + psl->setNext(sl->getNext()); + } + if (sl == first_) { + first_ = sl->getNext(); + } else if (sl == last_) { + last_ = psl; + } + n_--; + result = true; + this->release(sl); + break; + } else { + psl = sl; + sl = sl->getNext(); + } + } + } + return result; +} + +template inline void SYSslist::clear() { + SYSslink* sl = first_; + SYSslink* psl = 0; + while (sl != 0) { + psl = sl; + sl = sl->getNext(); + this->release(psl); + } + first_ = 0; + last_ = 0; + n_ = 0; +} + +template inline SYSslist::~SYSslist() { + clear(); +} + +template inline uint32_t SYSslist::index(const T& t) const { + uint32_t result = 0; + SYSslink* sl = first_; + while (sl != 0) { + if (sl->getObject() == t) { + return result; + } + sl = sl->getNext(); + result++; + } + return SYS_NPOS; +} + +template inline bool SYSslist::find(const T& t, T& result) const { + SYSslink* sl = first_; + while (sl != 0) { + if (sl->getObject() == t) { + result = sl->getObject(); + return true; + } + sl = sl->getNext(); + } + return false; +} + +template inline bool SYSslist::contains(const T& t) const { + return (index(t) != SYS_NPOS); +} + +template inline SYSslist::SYSslist() : first_(0), last_(0), n_(0) { +} + +template inline const T& SYSslist::operator [](const uint32_t index) const { + const SYSslink* sl = atPosition(index); + return sl->getObject(); +} + +template inline T& SYSslist::operator [](const uint32_t index) { + SYSslink* sl = atPosition(index); + return sl->getObject(); +} + +template inline const T& SYSslist::first() const { + assert(n_ > 0); + return first_->getObject(); +} + +template inline const T& SYSslist::last() const { + assert(n_ > 0); + return last_->getObject(); +} + +template inline T& SYSslist::first() { + assert(n_ > 0); + return first_->getObject(); +} + +template inline T& SYSslist::last() { + assert(n_ > 0); + return last_->getObject(); +} + +template > class SYSslistIterator : public A { +public: + + // constructor + SYSslistIterator(SYSslist& list); + + // operators + bool operator ++(); + bool operator ()(); + + // operations + void reset(); + const T& key() const; + T& key(); + bool remove(); + +private: + + // copy, assignment and equality are forbidden + SYSslistIterator(const SYSslistIterator& right); + SYSslistIterator& operator =(const SYSslistIterator& right); + bool operator ==(const SYSslistIterator& right) const; + +protected: + + SYSslist& list_; + SYSslink* psl_; + SYSslink* sl_; +}; + +template inline void SYSslistIterator::reset() { + psl_ = 0; + sl_ = 0; +} + +template inline SYSslistIterator::SYSslistIterator(SYSslist& list) : + list_(list) { + reset(); +} + +template inline bool SYSslistIterator::operator ++() { + // first time? + if (psl_ == 0 && sl_ == 0) { + sl_ = list_.first_; + } else if (sl_ != 0) { + psl_ = sl_; + sl_ = sl_->getNext(); + } + return (sl_ != 0); +} + +template inline bool SYSslistIterator::operator ()() { + return ++(*this); +} + +template inline const T& SYSslistIterator::key() const { + return sl_->getObject(); +} + +template inline T& SYSslistIterator::key() { + return sl_->getObject(); +} + +template bool SYSslistIterator::remove() { + if (sl_ != 0) { + if (list_.entries() == 1) { + list_.clear(); + psl_ = 0; + sl_ = 0; + } else if (sl_ == list_.first_) { + list_.removeAt(0); + psl_ = 0; + sl_ = list_.first_; + } else if (sl_ == list_.last_) { + sl_ = 0; + list_.removeLast(); + } else { + // remove current + psl_->setNext(sl_->getNext()); + this->release(sl_); + sl_ = psl_->getNext(); + list_.n_--; + } + return true; + } else { + return false; + } +} + +// copy operator/constructor for SYSslist: need iterator +template SYSslist& SYSslist::operator =(const SYSslist& right) { + clear(); + SYSslistIterator iterator((SYSslist&) right); + while (iterator()) { + insert(iterator.key()); + } + return *this; +} + +template SYSslist::SYSslist(const SYSslist& right) : first_(0), last_(0), n_(0) { + *this = right; +} + +template class SYSpSlistIterator; + +template > class SYSpSlist: public SYSslist { + friend class SYSpSlistIterator ; + +public: + + // constructors + SYSpSlist(); + + // accessors + T* first() const; + T* last() const; + + // operations + T* remove(const T* t); + uint32_t index(const T* t) const; + bool contains(const T* t) const; + T* find(const T* t) const; + void clearAndDestroy(); + +private: + + // equality not implemented + bool operator ==(const SYSpSlist& right) const; +}; + +template inline SYSpSlist::SYSpSlist() : SYSslist() { +} + +template inline T* SYSpSlist::first() const { + return (this->isEmpty() ? 0 : this->first_->getObject()); +} + +template inline T* SYSpSlist::last() const { + return (this->isEmpty() ? 0 : this->last_->getObject()); +} + +template inline T* SYSpSlist::remove(const T* t) { + T* result = 0; + SYSslink* sl = this->first_; + if (this->n_ == 1) { + if (*sl->getObject() == *t) { + result = sl->getObject(); + this->release(sl); + this->first_ = 0; + this->last_ = 0; + this->n_ = 0; + } + } else if (this->n_ > 1) { + SYSslink* psl = 0; + while (sl != 0) { + if (*sl->getObject() == *t) { + result = sl->getObject(); + if (psl != 0) { + psl->setNext(sl->getNext()); + } + if (sl == this->first_) { + this->first_ = sl->getNext(); + } else if (sl == this->last_) { + this->last_ = psl; + } + this->n_--; + this->release(sl); + break; + } else { + psl = sl; + sl = sl->getNext(); + } + } + } + return result; +} + +template inline void SYSpSlist::clearAndDestroy() { + SYSslink* sl = this->first_; + SYSslink* psl = 0; + while (sl != 0) { + psl = sl; + sl = sl->getNext(); + T* object = psl->getObject(); + delete object; + this->release(psl); + } + this->first_ = 0; + this->last_ = 0; + this->n_ = 0; +} + +template inline uint32_t SYSpSlist::index(const T* t) const { + uint32_t result = 0; + SYSslink* sl = this->first_; + while (sl != 0) { + if (*sl->getObject() == *t) { + return result; + } + sl = sl->getNext(); + result++; + } + return SYS_NPOS; +} + +template inline T* SYSpSlist::find(const T* t) const { + T* result = 0; + SYSslink* sl = this->first_; + while (sl != 0) { + if (*sl->getObject() == *t) { + result = sl->getObject(); + break; + } + sl = sl->getNext(); + } + return result; +} + +template inline bool SYSpSlist::contains(const T* t) const { + return (this->index(t) != SYS_NPOS); +} + +template > class SYSpSlistIterator { +public: + + // constructor + SYSpSlistIterator(SYSpSlist& list); + + // operators + bool operator ++(); + T* operator ()(); + + // operations + void reset(); + const T* key() const; + T* key(); + bool remove(); + +private: + + // copy, assignment and equality are forbidden + SYSpSlistIterator(const SYSpSlistIterator& right); + SYSpSlistIterator& operator =(const SYSpSlistIterator& right); + bool operator ==(const SYSpSlistIterator& right) const; + +protected: + + SYSpSlist& list_; + SYSslink* psl_; + SYSslink* sl_; +}; + +template inline void SYSpSlistIterator::reset() { + psl_ = 0; + sl_ = 0; +} + +template inline SYSpSlistIterator::SYSpSlistIterator(SYSpSlist& list) : + list_(list) { + reset(); +} + +template inline bool SYSpSlistIterator::operator ++() { + // first time? + if (psl_ == 0 && sl_ == 0) { + sl_ = list_.first_; + } else if (sl_ != 0) { + psl_ = sl_; + sl_ = sl_->getNext(); + } + return (sl_ != 0); +} + +template inline T* SYSpSlistIterator::operator ()() { + if (++(*this)) { + return sl_->getObject(); + } + return 0; +} + +template inline const T* SYSpSlistIterator::key() const { + return sl_->getObject(); +} + +template inline T* SYSpSlistIterator::key() { + return sl_->getObject(); +} + +template inline bool SYSpSlistIterator::remove() { + if (sl_ != 0) { + if (list_.entries() == 1) { + list_.clear(); + psl_ = 0; + sl_ = 0; + } else if (sl_ == list_.first_) { + list_.removeAt(0); + psl_ = 0; + sl_ = list_.first_; + } else if (sl_ == list_.last_) { + sl_ = 0; + list_.removeLast(); + } else { + // remove current + psl_->setNext(sl_->getNext()); + this->release(sl_); + sl_ = psl_->getNext(); + list_.n_--; + } + return true; + } else { + return false; + } +} + +template class SYSidlink { +public: + + SYSidlink(); + +public: + + T* prev_; + T* next_; +}; + +template inline SYSidlink::SYSidlink() : + prev_(0), next_(0) { +} + +template class SYSidlistIterator; + +template class SYSidlist { + friend class SYSidlistIterator ; + +public: + + // constructor + SYSidlist(); + + // destructor + ~SYSidlist(); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + T* last() const; + T* first() const; + + // operations + T* remove(T* t); + T* removeFirst(); + void prepend(T* t); + void append(T* t); + void clear() { + first_ = 0; + last_ = 0; + n_ = 0; + } + void append(SYSidlist& list) { + n_ += list.n_; + if (last_ == 0) { + first_ = list.first_; + last_ = list.last_; + } else { + last_->next_ = list.first_; + if (list.first_ != 0) { + list.first_->prev_ = last_; + } + if (list.last_ != 0) { + last_ = list.last_; + } + } + } + bool contains(T* t) const { + T* it = first_; + while (it != 0) { + if (it == t) { + return true; + } + it = it->next_; + } + return false; + } + + bool contains(const T& t) const { + T* it = first_; + while (it != 0) { + if (*it == t) { + return true; + } + it = it->next_; + } + return false; + } + +protected: + + // internal link/unlink operations + void unlink(T* t); + void link(T* t, T* right); + +private: + + // copy, assignment and equality are forbidden + SYSidlist(const SYSidlist& right); + SYSidlist& operator =(const SYSidlist& right); + bool operator ==(const SYSidlist& right) const; + +protected: + + T* first_; + T* last_; + uint32_t n_; +}; + +// remove t from list +template inline void SYSidlist::unlink(T* t) { + assert(n_ > 0); + if (t->prev_ != 0) { + assert(t->prev_->next_ == t); + t->prev_->next_ = t->next_; + } else { + assert(first_ == t); + first_ = t->next_; + } + if (t->next_ != 0) { + assert(t->next_->prev_ == t); + t->next_->prev_ = t->prev_; + } else { + assert(last_ == t); + last_ = t->prev_; + } + t->prev_ = 0; + t->next_ = 0; + n_--; +} + +// link right to the right of t (list must not be empty) +template inline void SYSidlist::link(T* t, T* right) { + assert(first_ != 0 && last_ != 0 && t != 0 && right != 0); + right->prev_ = t; + right->next_ = t->next_; + t->next_ = right; + right->next_->prev_ = right; + if (last_ == t) { + last_ = right; + } + n_++; +} + +template inline uint32_t SYSidlist::entries() const { + return n_; +} + +template inline bool SYSidlist::isEmpty() const { + return (n_ == 0); +} + +template inline T* SYSidlist::first() const { + return first_; +} + +template inline T* SYSidlist::last() const { + return last_; +} + +template inline void SYSidlist::append(T* t) { + if (last_ == 0) { + assert(first_ == 0); + last_ = t; + first_ = t; + t->prev_ = 0; + t->next_ = 0; + } else { + assert(first_ != 0 && last_->next_ == 0); + last_->next_ = t; + t->prev_ = last_; + t->next_ = 0; + last_ = t; + } + n_++; +} + +template inline void SYSidlist::prepend(T* t) { + if (last_ == 0) { + assert(first_ == 0); + last_ = t; + first_ = t; + t->prev_ = 0; + t->next_ = 0; + } else { + first_->prev_ = t; + t->prev_ = 0; + t->next_ = first_; + first_ = t; + } + n_++; +} + +template inline T* SYSidlist::removeFirst() { + if (n_ == 0) { + return 0; + } else { + T* t = first_; + unlink(first_); + return t; + } +} + +// remove an element from the list (undefined results if the element is not in the list) +template inline T* SYSidlist::remove(T* t) { + unlink(t); + return t; +} + +template inline SYSidlist::~SYSidlist() { +} + +template inline SYSidlist::SYSidlist() : + first_(0), last_(0), n_(0) { +} + +template class SYSidlistIterator { +public: + + // constructor + SYSidlistIterator(SYSidlist& list); + + // operators + T* operator ++(); + T* operator --(); + T* operator ()(); + + // operations + void reset(); + T* key() const; + void insert(T* t); + +private: + + // copy, assignment and equality are forbidden + SYSidlistIterator(const SYSidlistIterator& right); + SYSidlistIterator& operator =(const SYSidlistIterator& right); + bool operator ==(const SYSidlistIterator& right) const; + +protected: + + SYSidlist& list_; + T* l_; +}; + +template inline void SYSidlistIterator::reset() { + l_ = 0; +} + +template inline SYSidlistIterator::SYSidlistIterator(SYSidlist& list) : + list_(list) { + reset(); +} + +template inline T* SYSidlistIterator::operator ++() { + l_ = (l_ == 0 ? list_.first_ : l_->next_); + return l_; +} + +template inline T* SYSidlistIterator::operator --() { + if (l_ != 0) { + l_ = l_->prev_; + } + return l_; +} + +template inline void SYSidlistIterator::insert(T* t) { + assert(l_ != 0); + list_.link(l_, t); +} + +template inline T* SYSidlistIterator::operator ()() { + return ++(*this); +} + +template inline T* SYSidlistIterator::key() const { + return l_; +} + +} + +#endif /* #ifndef _engine_list_h_ */ diff --git a/storage/sparrow/engine/listener.cc b/storage/sparrow/engine/listener.cc new file mode 100644 index 000000000000..24f0fda10e20 --- /dev/null +++ b/storage/sparrow/engine/listener.cc @@ -0,0 +1,718 @@ +/* + Listener thread. +*/ + +#include "listener.h" +#include "serial.h" +#include "types.h" +#include "internalapi.h" +#include "socketutil.h" +#include "compress.h" +#include "binbuffer.h" +#include "coalescing.h" +#include "purge.h" +#include "../dns/dnsdefault.h" +#include "../handler/plugin.h" // For configuration parameters. + +#include +//#include + +#include "../engine/log.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Listener +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Listener* Listener::listener_ = 0; + +template class Sort; + +// Initializes the listener thread. +// STATIC +void Listener::initialize() _THROW_(SparrowException) { + SPARROW_ENTER("Listener::initialize"); + + // Create socket. + SocketAddress socketAddress = SocketUtil::getAddress(sparrow_listener_address, sparrow_listener_port); + my_socket socketId = SocketUtil::create(SOCK_STREAM, socketAddress); + + // Listen socket. + if (listen(socketId, 5)) { // 5 connections max in the queue. + closesocket(socketId); + throw SparrowException::create(true, "Cannot listen socket"); + } + + // Create and start listener thread. + listener_ = new Listener(socketId); + if (!listener_->start()) { + throw SparrowException::create(false, "Cannot start listener thread"); + } + + // Log info message. + spw_print_information("Sparrow is ready and listening %s", socketAddress.print().c_str()); +} + +// Timeout is in milliseconds. +Listener::Listener(my_socket socketId) : Thread("Listener::listener_"), socket_(socketId) { + SPARROW_ENTER("Listener::Listener"); + FD_ZERO(&fdSet_); + FD_SET(socket_, &fdSet_); + my_socket stopSocket = SocketUtil::getStopSocket(); + if (stopSocket != INVALID_SOCKET) { + FD_SET(stopSocket, &fdSet_); + } +} + +Listener::~Listener() { + SPARROW_ENTER("Listener::~Listener"); + ::shutdown(socket_, SHUT_RDWR); + closesocket(socket_); + while (!handlers_.isEmpty()) { + ConnectionHandler* handler = handlers_.first(); + handler->stop(2000); + delete handler; + } +} + +bool Listener::process() { + SPARROW_ENTER("Listener::process"); + fd_set fdSet = fdSet_; + my_socket stopSocket = SocketUtil::getStopSocket(); + my_socket maxSocket = stopSocket == INVALID_SOCKET ? socket_ : std::max(socket_, stopSocket); + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + int rc = select(static_cast(maxSocket + 1), &fdSet, 0, 0, &tv); + if (rc > 0 && FD_ISSET(socket_, &fdSet)) { + // Accept incoming connection. + my_socket newSocketId = accept(socket_, 0, 0); + if (newSocketId >= 0) { + ConnectionHandler* handler = new ConnectionHandler(newSocketId); + if (addHandler(handler)) { + DBUG_PRINT("sparrow_api", ("Accepting incoming connection on TCP socket %u", static_cast(newSocketId))); + // Spawn a connection handler thread. + if (!handler->start()) { + spw_print_error("Sparrow: Cannot start connection handler"); + delete handler; + } + } else { + delete handler; + spw_print_error("sparrow_api: Connection refused; maximum number of connections (%u) reached", sparrow_max_connections); + } + } else { + try { + throw SparrowException::create(true, "Cannot accept new connection"); + } catch(const SparrowException& e) { + e.toLog(); + } + } + } + return true; +} + +bool Listener::notifyStop() { + return SocketUtil::notifyStopSocket(); +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ConnectionHandler +////////////////////////////////////////////////////////////////////////////////////////////////////// + +ConnectionHandler::ConnectionHandler(my_socket socketId) : Thread(ConnectionHandler::getName().c_str()), connection_(new Connection(socketId)) { + SPARROW_ENTER("ConnectionHandler::ConnectionHandler"); + FD_ZERO(&fdSet_); + FD_SET(connection_->getSocket(), &fdSet_); + my_socket stopSocket = SocketUtil::getStopSocket(); + if (stopSocket != INVALID_SOCKET) { + FD_SET(stopSocket, &fdSet_); + } +} + +volatile uint32_t ConnectionHandler::counter_ = 0; + +// STATIC +Str ConnectionHandler::getName() { + char tmp[128]; + snprintf(tmp, sizeof(tmp), "ConnectionHandler(%u)", Atomic::inc32(&counter_)); + return Str(tmp); +} + +ConnectionHandler::~ConnectionHandler() { + SPARROW_ENTER("ConnectionHandler::~ConnectionHandler"); + Listener::removeHandler(this); + DBUG_PRINT("sparrow_api", ("Stopping connection handler thread")); +} + +bool ConnectionHandler::process() { + SPARROW_ENTER("ConnectionHandler::process"); + if (connection_->isClosed()) { + return false; + } + fd_set fdSet = fdSet_; + my_socket stopSocket = SocketUtil::getStopSocket(); + my_socket socketId = connection_->getSocket(); + my_socket maxSocket = stopSocket == INVALID_SOCKET ? socketId : std::max(socketId, stopSocket); + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + int rc = select(static_cast(maxSocket + 1), &fdSet, 0, 0, &tv); + if (rc <= 0) { + return true; + } else if (FD_ISSET(socketId, &fdSet)) { + try { + DBUG_PRINT("sparrow_api", ("Receiving request...")); + Request* request = new Request(connection_.get()); + ApiWorker::sendJob(request); + return true; + } catch(const SparrowException& e) { + DBUG_PRINT("sparrow_api", ("Request aborted")); + e.toLog(); + return false; // This will kill and destroy this thread. + } + } else { + return false; + } +} + +bool ConnectionHandler::notifyStop() { + return SocketUtil::notifyStopSocket(); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Connection +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Connection::Connection(my_socket socketId) : socket_(socketId), authentified_(false), closed_(false), lock_(false, Connection::getName().c_str()) { + setsockopt(socket_, SOL_SOCKET, SO_RCVBUF, (char*)&sparrow_socket_rcvbuf_size, sizeof(sparrow_socket_rcvbuf_size)); + setsockopt(socket_, SOL_SOCKET, SO_SNDBUF, (char*)&sparrow_socket_sndbuf_size, sizeof(sparrow_socket_sndbuf_size)); + Atomic::inc32(&SparrowStatus::get().apiActiveConnections_); + Atomic::inc64(&SparrowStatus::get().apiConnections_); +} + +volatile uint32_t Connection::counter_ = 0; + +// STATIC +Str Connection::getName() { + char tmp[128]; + snprintf(tmp, sizeof(tmp), "Connection(%u)::lock_", Atomic::inc32(&counter_)); + return Str(tmp); +} + +void Connection::close() { + Guard guard(lock_); + if (!closed_) { + closed_ = true; + ::shutdown(socket_, SHUT_RDWR); + ::closesocket(socket_); + Atomic::dec32(&SparrowStatus::get().apiActiveConnections_); + DBUG_PRINT("sparrow_api", ("Closed TCP socket %u, remaining %u active connections", socket_, SparrowStatus::get().apiActiveConnections_)); + } +} + +Connection::~Connection() { + SPARROW_ENTER("Connection::~Connection"); + close(); +} + +void Connection::authenticate(const Str& username, const ByteBuffer& encryptedPassword) _THROW_(SparrowException) { + const uint32_t length = static_cast(encryptedPassword.limit()); + static const char* secretKey = "49#28!86@14\"&"; + const uint32_t aesLength = static_cast(my_aes_get_size(length, my_aes_128_ecb)); + char* decryptedPassword = static_cast(IOContext::getTempBuffer2(aesLength)); + const int result = my_aes_decrypt(reinterpret_cast(encryptedPassword.getData()), + length, reinterpret_cast(decryptedPassword), + reinterpret_cast(secretKey), static_cast(strlen(secretKey)), my_aes_128_ecb, NULL); + if (result <= 0) { + throw SparrowException("Cannot decrypt password", false); + } + const Str password(decryptedPassword, result); + try { + // Dummy query to check username/password against MySQL. + MySQLGuard guard(username.c_str(), password.c_str()); + guard.execute("select 1"); + { + Guard lockGuard(lock_); + username_ = username; + password_ = password; + authentified_ = true; + } + } catch(const SparrowException& e) { + // Replace MySQL error by a generic authentication error. + char tmp[1080]; + snprintf(tmp, sizeof(tmp), "Cannot validate username and password: %s", e.getText()); + throw SparrowException(tmp, false); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Request +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const uint8_t Request::TAG[] = { 83, 80, 65, 82, 82, 79, 87 }; // "SPARROW" + +const uint8_t Request::SPARROW_API_VERSION = 1; + +Request::Request(Connection* connection) _THROW_(SparrowException) : connection_(connection) { + SPARROW_ENTER("Request::Request"); + uint32_t compressedLength; + + // Read header. + { + uint8_t header[28]; + ByteBuffer buffer(header, sizeof(header)); + SocketReader reader(*connection_, buffer); + for (uint32_t i = 0; i < sizeof(TAG); ++i) { + uint8_t check; + reader >> check; + if (check != TAG[i]) { + dump_buffer(buffer); + throw SparrowException::create(false, "Malformed API request"); + } + } + uint8_t version; + reader >> version; + if (version != SPARROW_API_VERSION) { + throw SparrowException::create(false, "Unsupported API version: %u", static_cast(version)); + } + reader >> id_; + reader >> length_ >> compressionAlgorithm_ >> compressedLength >> action_; + if (length_ > 100 * 1024 * 1024 || compressedLength > 100 * 1024 * 1024) { + const Str size(Str::fromSize(length_)); + throw SparrowException::create(false, "Received too much data (%s)", size.c_str()); + } + } + + // Read compressed request data. + { + buffer_ = RequestBuffer(compressedLength); + ByteBuffer buffer(buffer_.data(), compressedLength); + SocketReader reader(*connection_, buffer); + reader.advance(compressedLength); + } + + // Update stats. + Atomic::inc64(&SparrowStatus::get().apiRequests_); + Atomic::add64(&SparrowStatus::get().apiInputBytes_, 16 + compressedLength); + Atomic::add64(&SparrowStatus::get().apiInputUncompressedBytes_, 16 + length_); +} + +void Request::dump_buffer(const ByteBuffer& buffer) { + spw_print_information("Malformed API request. Dump of packet header:"); + uint64_t size = buffer.limit(); + if (size != 0) { + char* str = new char[size*3+1]; + const uint8_t* data = buffer.getData(); + uint j = 0; + for (uint64_t i=0; i> 4; + if (c < 10) { + str[j++] = c + '0'; + } else { + str[j++] = c - 10 + 'A'; + } + + c = *data & 0x0F; + if (c < 10) { + str[j++] = c + '0'; + } else { + str[j++] = c - 10 + 'A'; + } + str[j++] = ' '; + data++; + } + str[j] = '\0'; + spw_print_information("%s", str); + delete [] str; + } +} + +void Request::process() { + SPARROW_ENTER("Request::process"); + try { + // Decompress request. + if (buffer_.length() < length_) { + if (compressionAlgorithm_ != 1) { + throw SparrowException::create(false, "Only lzjb compression is supported"); + } + RequestBuffer buffer(length_); + const int result = LZJB::decompress(buffer_.data(), buffer.data(), buffer_.length(), length_); + if (result != 0) { + throw SparrowException::create(false, "Cannot decompress data"); + } + buffer_ = buffer; + } + + // Actually process the request and generate the response. + GrowingByteBuffer response; + doProcess(response); + + // Compress the response using the input compression algorithm and send it. + const uint32_t length = static_cast(response.position()); + if (compressionAlgorithm_ == 0) { + // No compression. + SocketWriterGuard guard(*connection_); + ByteBuffer& writer = guard.get(); + for (uint32_t i = 0; i < sizeof(TAG); ++i) { + writer << TAG[i]; + } + writer << SPARROW_API_VERSION; + writer << id_ << length << compressionAlgorithm_ << length; + writer << ByteBuffer(response.getData(), length); + + // Update stats. + Atomic::inc64(&SparrowStatus::get().apiResponses_); + Atomic::add64(&SparrowStatus::get().apiOutputBytes_, length); + Atomic::add64(&SparrowStatus::get().apiOutputUncompressedBytes_, length); + } else { + RequestBuffer compressedBuffer(length); + const uint32_t compressedLength = static_cast(LZJB::compress(response.getData(), compressedBuffer.data(), length, length)); + { + SocketWriterGuard guard(*connection_); + ByteBuffer& writer = guard.get(); + for (uint32_t i = 0; i < sizeof(TAG); ++i) { + writer << TAG[i]; + } + writer << SPARROW_API_VERSION; + writer << id_ << length << compressionAlgorithm_ << compressedLength; + if (compressedLength < length) { + writer << ByteBuffer(compressedBuffer.data(), compressedLength); + } else { + writer << ByteBuffer(response.getData(), length); + } + } + + // Update stats. + Atomic::inc64(&SparrowStatus::get().apiResponses_); + Atomic::add64(&SparrowStatus::get().apiOutputBytes_, compressedLength); + Atomic::add64(&SparrowStatus::get().apiOutputUncompressedBytes_, length); + } + } catch(const SparrowException& e) { + e.toLog(); + } +} + +void Request::doProcess(ByteBuffer& response) _THROW_(SparrowException) { + SPARROW_ENTER("Request::doProcess"); + MasterGuard master; + GrowingByteBuffer resp_data; + bool ok = false; + try { + ByteBuffer buffer(buffer_.data(), buffer_.length()); + if (action_ != MSG_ID_AUTH && !connection_->isAuthentified()) { + throw SparrowException::create(false, "Connection is not authenticated"); + } + if (action_ == MSG_ID_AUTH) { + DBUG_PRINT("sparrow_api", ("MSG_ID_AUTH")); + Str username; + buffer >> username; + uint32_t length; + buffer >> length; + ByteBuffer encryptedPassword(static_cast(IOContext::getTempBuffer1(length)), length); + buffer >> encryptedPassword; + connection_->authenticate(username, encryptedPassword); + DBUG_PRINT("sparrow_api", ("Successfully authenticated connection")); + } else if (action_ == MSG_ID_INIT) { + Str database; + Str table; + buffer >> database >> table; + DBUG_PRINT("sparrow_api", ("MSG_ID_INIT, table %s.%s", database.c_str(), table.c_str())); + ColumnExs columns; + Indexes indexes; + ForeignKeys foreignKeys; + DnsConfiguration dnsConfiguration; + uint32_t aggregationPeriod; + uint64_t defaultWhere; + uint64_t stringOptimization; + uint64_t maxLifetime; + uint64_t coalescingPeriod; + buffer >> columns >> indexes >> foreignKeys >> dnsConfiguration >> aggregationPeriod >> defaultWhere + >> stringOptimization >> maxLifetime >> coalescingPeriod; + InternalApi::init(connection_->getUsername().c_str(), connection_->getPassword().c_str(), database.c_str(), table.c_str(), + columns, indexes, foreignKeys, dnsConfiguration, aggregationPeriod, defaultWhere, stringOptimization, maxLifetime, coalescingPeriod); + DBUG_PRINT("sparrow_api", ("Successfully initialized table %s.%s", database.c_str(), table.c_str())); + } else if (action_ == MSG_ID_DATA) { + Str database; + Str table; + buffer >> database >> table; + DBUG_PRINT("sparrow_api", ("MSG_ID_DATA, table %s.%s", database.c_str(), table.c_str())); + uint32_t rows; + buffer >> rows; + uint32_t length; + buffer >> length; + InternalApi::write(database.c_str(), table.c_str(), buffer, rows); + DBUG_PRINT("sparrow_api", ("Successfully inserted %u rows into table %s.%s", rows, database.c_str(), table.c_str())); + } else if (action_ == MSG_ID_DATA_EX) { + Str database; + Str table; + buffer >> database >> table; + uint32_t nbCols; + buffer >> nbCols; + Names colNames(nbCols); + for (uint i=0; i> colName; + colNames.append(colName); + } + uint32_t rows; + buffer >> rows; + uint32_t length; + buffer >> length; + DBUG_PRINT("sparrow_api", ("MSG_ID_DATA_EX, table %s.%s", database.c_str(), table.c_str())); + InternalApi::write(database.c_str(), table.c_str(), colNames, buffer, rows); + DBUG_PRINT("sparrow_api", ("Successfully inserted %u rows into table %s.%s on a selection of %u columns", rows, database.c_str(), table.c_str(), colNames.entries())); + } else if (action_ == MSG_ID_GET_MASTER) { + Str database; + Str table; + buffer >> database >> table; + DBUG_PRINT("sparrow_api", ("MSG_ID_GET_MASTER, table %s.%s", database.c_str(), table.c_str())); + try { + master = InternalApi::get(database.c_str(), table.c_str(), false, false, 0); + } catch(const SparrowException& e) { + // The master file may have been copied after database startup: discover master files again and retry. + InternalApi::setup(); + master = InternalApi::get(database.c_str(), table.c_str(), false, false, 0); + } + if (master != 0) { + master.get()->retrieve(resp_data); + } + DBUG_PRINT("sparrow_api", ("Successfully retrieved master file from table %s.%s", database.c_str(), table.c_str())); + } else if (action_ == MSG_ID_DISABLE_COALESCING) { + uint32_t timeout; + bool wait_until_end = false; + buffer >> timeout; + if (!buffer.end()) { + buffer >> wait_until_end; + } + DBUG_PRINT("sparrow_api", ("MSG_ID_DISABLE_COALESCING, timeout %us, wait %u", timeout, wait_until_end)); + CoalescingControlTask::disable(timeout, wait_until_end); + } else if (action_ == MSG_ID_DISABLE_COALESCING_DB) { + uint32_t timeout; + Str database; + bool wait_until_end = false; + buffer >> timeout >> database; + if (!buffer.end()) { + buffer >> wait_until_end; + } + DBUG_PRINT("sparrow_api", ("MSG_ID_DISABLE_COALESCING_DB, timeout %us, database %s, wait %u", timeout, database.c_str(), wait_until_end)); + CoalescingControlTaskPerDB::disable(timeout, database, wait_until_end); + } else if (action_ == MSG_ID_REMOVE_PARTITIONS) { + Str database; + Str table; + uint64_t low; + uint64_t up; + buffer >> database >> table >> low >> up; + const TimePeriod period(low == 0 ? 0 : &low, up == 0 ? 0 : &up, low != 0, up != 0); +#ifndef NDEBUG + const Str sPeriod = Str::fromTimePeriod(period); + DBUG_PRINT("sparrow_api", ("MSG_ID_REMOVE_PARTITIONS, table %s.%s, period %s", database.c_str(), table.c_str(), sPeriod.c_str())); +#endif + InternalApi::removePartitions(database.c_str(), table.c_str(), period); + } else if (action_ == MSG_ID_SWITCH_PURGE_MODE) { + uint32_t timeout; + Str database; + int mode; + buffer >> timeout >> database >> mode; + DBUG_PRINT("sparrow_api", ("MSG_ID_SWITCH_PURGE_MODE to mode %u, timeout %us, database %s", mode, timeout, database.c_str())); + PurgeModeControlTaskPerDB::switchMode(timeout, database, static_cast(mode)); + } else { + throw SparrowException::create(false, "Unknown action code %u", action_); + } + ok = true; + } catch(const SparrowException& e) { + e.toLog(); + response << Str(e.getText(), false) << e.get_err_code(); + } + if (ok) { + response << static_cast(0); + if (resp_data.position() != 0) + { + resp_data.limit(resp_data.position()+1); + resp_data.position(0); + response << resp_data; + } + //if (master != 0) { // For MSG_ID_GET_MASTER. + // master.get()->retrieve(response); + //} + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingControlTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Lock CoalescingControlTask::lock_(true, "CoalescingControlTask::lock_"); +CoalescingControlTask* CoalescingControlTask::task_ = 0; + +void CoalescingControlTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + if (!sparrow_coalescing && initialCoalescing_) { + CoalescingWorker::initialize(); + } else if (sparrow_coalescing && !initialCoalescing_) { + CoalescingWorker::shutdown(); + } + sparrow_coalescing = initialCoalescing_; + { + Guard guard(lock_); + task_ = 0; + } +} + +// STATIC +void CoalescingControlTask::disable(const uint32_t timeout, bool wait) { + { + Guard guard(lock_); + // If the task already exist, reschedule it. If the scheduler can't find it, it means the task is being processed, + // therefore do not do anything. + if (task_ != 0) { + DBUG_PRINT("sparrow_coalescing_control_task", ("Rescheduling coalescing ctrl task %p for %us", task_, timeout)); + if (!Scheduler::moveTask(task_, Scheduler::now() + timeout * 1000)) { + DBUG_PRINT("sparrow_coalescing_control_task", ("Could not reschedule coalescing ctrl task %p.", task_)); + } + } else { + task_ = new CoalescingControlTask(); + DBUG_PRINT("sparrow_coalescing_control_task", ("Scheduling coalescing ctrl task %p for %us", task_, timeout)); + Scheduler::addTask(task_, Scheduler::now() + timeout * 1000, true); + } + } + + if (timeout != 0) + { + InternalApi::StopCoalescingTasks(); + + if (wait) { + Masters masters = InternalApi::getAll(); + const uint32_t nbMasters = masters.length(); + while (true) { + uint32_t i = 0; + for (i=0; imode_ = mode; + if (!Scheduler::moveTask(task, Scheduler::now() + timeout * 1000)) { + DBUG_PRINT("sparrow_purge_control_task", ("Could not reschedule purge ctrl task %p.", task)); + task = 0; + } + } + if ( task == 0 ) { + task = new PurgeModeControlTaskPerDB( schema, mode ); + DBUG_PRINT("sparrow_purge_control_task", ("Scheduling purge ctrl task %p for schema %s, mode %u for %us", task, schema.c_str(), mode, timeout)); + tasks_.append( task ); + Scheduler::addTask(task, Scheduler::now() + timeout * 1000, true); + } +} + +// STATIC +PurgeMode PurgeModeControlTaskPerDB::getMode(const Str& schema) { + PurgeModeControlTaskPerDB key(schema); + Guard guard(lock_); + PurgeModeControlTaskPerDB* task = tasks_.find(&key); + if (task != NULL) { + return task->mode_; + } + return (sparrow_purge_constantly ? PURGE_MODE_CONSTANTLY : PURGE_MODE_ON_INSERTION); +} + +} diff --git a/storage/sparrow/engine/listener.h b/storage/sparrow/engine/listener.h new file mode 100644 index 000000000000..1757ab1c3f63 --- /dev/null +++ b/storage/sparrow/engine/listener.h @@ -0,0 +1,348 @@ +/* + Listener thread. +*/ + +#ifndef _engine_listener_h_ +#define _engine_listener_h_ + +#ifdef _WIN32 +#include +#include +#else +#include +#endif +#include "scheduler.h" +#include "thread.h" +#include "exception.h" +#include "misc.h" +#include "hash.h" +#include "lock.h" +#include "purge.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Connection +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SocketWriterGuard; +class Connection : public RefCounted { + friend class SocketWriterGuard; + +private: + + my_socket socket_; + bool authentified_; + bool closed_; + Str username_; + Str password_; + Lock lock_; // To serialize writes and to make authentication atomic. + + static volatile uint32_t counter_; + +private: + + static Str getName(); + +public: + + Connection(my_socket socketId); + + void authenticate(const Str& username, const ByteBuffer& encryptedPassword) _THROW_(SparrowException); + + my_socket getSocket() const { + return socket_; + } + + bool isAuthentified() const { + return authentified_; + } + + const Str& getUsername() const { + return username_; + } + + const Str& getPassword() const { + return password_; + } + + void close(); + + bool isClosed() const { + return closed_; + } + + ~Connection(); +}; + +typedef RefPtr ConnectionGuard; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketWriterGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SocketWriterGuard : private Guard { +private: + + SocketWriter writer_; + +public: + + SocketWriterGuard(Connection& connection) : Guard(connection.lock_), writer_(connection) { + } + + ByteBuffer& get() { + return writer_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Request +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef SYSarray RequestBuffer; + +class Request : public Job { +private: + + ConnectionGuard connection_; + uint32_t id_; + uint32_t action_; + RequestBuffer buffer_; + uint32_t length_; + uint32_t compressionAlgorithm_; + + static const uint8_t TAG[]; + + static const uint8_t SPARROW_API_VERSION; + +private: + + void doProcess(ByteBuffer& response) _THROW_(SparrowException); + + void dump_buffer(const ByteBuffer&); + +public: + + Request(Connection* connection) _THROW_(SparrowException); + + void process() override; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ConnectionHandler +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define MSG_ID_AUTH 0 +#define MSG_ID_INIT 1 +#define MSG_ID_DATA 2 +#define MSG_ID_GET_MASTER 3 +#define MSG_ID_DISABLE_COALESCING 4 +#define MSG_ID_REMOVE_PARTITIONS 5 +#define MSG_ID_DISABLE_COALESCING_DB 6 +#define MSG_ID_DATA_EX 7 +#define MSG_ID_SWITCH_PURGE_MODE 8 + +class ConnectionHandler : public Thread, public SYSidlink { +private: + + ConnectionGuard connection_; + fd_set fdSet_; + static volatile uint32_t counter_; + +private: + + static Str getName(); + +protected: + + bool process() override; + + bool notifyStop() override; + + bool deleteAfterExit() override { + return true; + } + +public: + + ConnectionHandler(my_socket socketId); + + ~ConnectionHandler(); +}; + +typedef SYSidlist ConnectionHandlers; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Listener +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Listener : public Thread { +private: + + static Listener* listener_; + + my_socket socket_; + fd_set fdSet_; + + ConnectionHandlers handlers_; + +protected: + + bool process() override; + + bool notifyStop() override; + + bool deleteAfterExit() override { + return false; + } + +public: + + static void initialize() _THROW_(SparrowException); + + static void shutdown() { + if (listener_ != 0) { + listener_->stop(); + delete listener_; + listener_ = 0; + } + } + + Listener(my_socket socketId); + + ~Listener(); + + bool addHandler(ConnectionHandler* handler) { + Guard guard(lock_); + handlers_.append(handler); + return handlers_.entries() <= sparrow_max_connections; + } + + static void removeHandler(ConnectionHandler* handler) { + Guard guard(listener_->lock_); + listener_->handlers_.remove(handler); + } +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingControlTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingControlTask : public Task { +private: + + static Lock lock_; + static CoalescingControlTask* task_; // Task that disables the coalescing globally + bool initialCoalescing_; + +public: + + CoalescingControlTask() : Task(Worker::getQueue()), initialCoalescing_(sparrow_coalescing) { + sparrow_coalescing = 0; + } + + virtual bool operator == (const CoalescingControlTask& right) const { + return true; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); + + static void disable(const uint32_t timeout, bool wait); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CoalescingControlTaskPerDB +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class CoalescingControlTaskPerDB; +typedef SYSpVector CoalescingControlTasks; + +class CoalescingControlTaskPerDB : public Task { +private: + + static Lock lock_; + static CoalescingControlTasks tasks_; + Str schema_; + +public: + + CoalescingControlTaskPerDB(const Str& schema) : Task(Worker::getQueue()), schema_(schema) { + DBUG_PRINT("sparrow_coalescing_control_task", ("New task %p for schema %s", this, schema_.c_str())); + } + + ~CoalescingControlTaskPerDB() { + DBUG_PRINT("sparrow_coalescing_control_task", ("Destroying task %p for schema %s", this, schema_.c_str())); + } + + virtual bool operator == (const CoalescingControlTaskPerDB& right) const { + return schema_ == right.schema_; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); + + static void disable(const uint32_t timeout, const Str& schema, bool wait); + + static bool isDisabled(const Str& schema); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PurgeModeControlTaskPerDB +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PurgeModeControlTaskPerDB; +typedef SYSpVector PurgeModeControlTasks; + +class PurgeModeControlTaskPerDB : public Task { +private: + + static Lock lock_; + static PurgeModeControlTasks tasks_; + Str schema_; + PurgeMode mode_; + +public: + + PurgeModeControlTaskPerDB(const Str& schema) : Task(Worker::getQueue()), schema_(schema) {;} + + PurgeModeControlTaskPerDB(const Str& schema, PurgeMode mode) : Task(Worker::getQueue()), schema_(schema), mode_(mode) {;} + + virtual bool operator == (const PurgeModeControlTaskPerDB& right) const { + return schema_ == right.schema_; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); + + static void switchMode(const uint32_t timeout, const Str& schema, PurgeMode mode); + + static PurgeMode getMode(const Str& schema); +}; + +} + +#endif /* #ifndef _engine_listener_h_ */ diff --git a/storage/sparrow/engine/lock.h b/storage/sparrow/engine/lock.h new file mode 100644 index 000000000000..c7101742fcb1 --- /dev/null +++ b/storage/sparrow/engine/lock.h @@ -0,0 +1,399 @@ +/* + Lock types. +*/ + +#ifndef _engine_lock_h_ +#define _engine_lock_h_ + +#include +//#include +#include "my_sys.h" +#include "my_systime.h" +#include "mysql/service_mysql_alloc.h" // my_free +//#include "thr_mutex.h" +#include "mysql/psi/mysql_mutex.h" +#include "mysql/psi/mysql_rwlock.h" + +#include "list.h" + +namespace Sparrow { + +#define PFS_MAX_INFO_NAME_LENGTH 128 +#define PFS_MAX_OS_NAME_LENGTH (16 - 3) + + +// Simple lock. +class Lock { +private: + + PSI_mutex_key key_; + PSI_mutex_info info_; + mysql_mutex_t lock_; + bool static_; + bool initialized_; + //static pthread_mutex_t registerLock_; + //static bool registerLockInitialized_; + +private: + + static SYSslist& getStatics() { + static SYSslist statics; + return statics; + } + + static native_mutex_t* initializeRegisterLock() { + native_mutex_t* registerLock = new native_mutex_t(); + native_mutex_init(registerLock, MY_MUTEX_INIT_FAST); + return registerLock; + } + + static native_mutex_t* getRegisterLock() { + static native_mutex_t* registerLock = Lock::initializeRegisterLock(); + return registerLock; + } + +public: + + // To make sure registrations into the performance schema are serialized. + static void lockPSI() { + native_mutex_lock(Lock::getRegisterLock()); + } + + static void unlockPSI() { + native_mutex_unlock(Lock::getRegisterLock()); + } + +private: + + void initialize() { +#ifdef HAVE_PSI_INTERFACE + //if (PSI_server != 0) { + Lock::lockPSI(); + //PSI_server->register_mutex("sparrow", &info_, 1); + mysql_mutex_register("sparrow", &info_, 1); + Lock::unlockPSI(); + //} +#endif + mysql_mutex_init(key_, &lock_, MY_MUTEX_INIT_FAST); + initialized_ = true; + } + + void clear() { + if (info_.m_name != 0) { + if (initialized_) { + mysql_mutex_destroy(&lock_); + } + my_free(const_cast(info_.m_name)); + info_.m_name = 0; + } + } + +public: + + Lock(const bool isStatic, const char* name) : static_(isStatic), initialized_(false) { + info_.m_key = &key_; + //const size_t l = strlen(name); + //name += l > PFS_MAX_INFO_NAME_LENGTH ? (l - PFS_MAX_INFO_NAME_LENGTH) : 0; + info_.m_name = my_strdup(PSI_INSTRUMENT_ME, name, MYF(MY_WME)); + info_.m_flags = 0; + info_.m_volatility = PSI_VOLATILITY_UNKNOWN; + info_.m_documentation = PSI_DOCUMENT_ME; + if (static_) { + Lock::getStatics().append(this); + } else { + initialize(); + } + } + + Lock &operator=(const Lock &) = delete; + Lock(const Lock &) = delete; + + static void initializeStatics() { + SYSslistIterator iterator(Lock::getStatics()); + while (++iterator) { + iterator.key()->initialize(); + } + } + + static void deinitializeStatics() { + SYSslistIterator iterator(Lock::getStatics()); + while (++iterator) { + iterator.key()->clear(); + } + } + + ~Lock() { + clear(); + } + + void lock() { + mysql_mutex_lock(&lock_); + } + + bool tryLock() { + if (mysql_mutex_trylock(&lock_) == 0) { + return true; + } else { + return false; + } + } + + void unlock() { + mysql_mutex_unlock(&lock_); + } + + const char* getName() const { + return info_.m_name; + } + + mysql_mutex_t* get() { + return &lock_; + } +}; + +// Single writer, multiple readers lock. +class RWLock { +private: + + PSI_rwlock_key key_; + PSI_rwlock_info info_; + mysql_rwlock_t lock_; + bool static_; + +private: + + static SYSslist& getStatics() { + static SYSslist statics; + return statics; + } + + void initialize() { +#ifdef HAVE_PSI_INTERFACE + //if (PSI_server != 0) { + Lock::lockPSI(); + mysql_rwlock_register("sparrow", &info_, 1); + Lock::unlockPSI(); + //} +#endif + mysql_rwlock_init(key_, &lock_); + } + + void clear() { + if (info_.m_name != 0) { + mysql_rwlock_destroy(&lock_); + my_free(const_cast(info_.m_name)); + info_.m_name = 0; + } + } + +public: + + RWLock(const bool isStatic, const char* name) : static_(isStatic) { + info_.m_key = &key_; + //const size_t l = strlen(name); + //name += l > PFS_MAX_INFO_NAME_LENGTH ? (l - PFS_MAX_INFO_NAME_LENGTH) : 0; + info_.m_name = my_strdup(PSI_INSTRUMENT_ME, name, MYF(MY_WME)); + info_.m_flags = 0; + info_.m_volatility = PSI_VOLATILITY_UNKNOWN; + info_.m_documentation = PSI_DOCUMENT_ME; + if (static_) { + RWLock::getStatics().append(this); + } else { + initialize(); + } + } + + RWLock &operator=(const RWLock &) = delete; + RWLock(const RWLock &) = delete; + + + static void initializeStatics() { + SYSslistIterator iterator(RWLock::getStatics()); + while (++iterator) { + iterator.key()->initialize(); + } + } + + static void deinitializeStatics() { + SYSslistIterator iterator(RWLock::getStatics()); + while (++iterator) { + iterator.key()->clear(); + } + } + + ~RWLock() { + clear(); + } + + void readLock() { + mysql_rwlock_rdlock(&lock_); + } + + void writeLock() { + mysql_rwlock_wrlock(&lock_); + } + + bool tryReadLock() { + if (mysql_rwlock_tryrdlock(&lock_) == 0) { + return true; + } else { + return false; + } + } + + bool tryWriteLock() { + if (mysql_rwlock_trywrlock(&lock_) == 0) { + return true; + } else { + return false; + } + } + + void unlock() { + mysql_rwlock_unlock(&lock_); + } +}; + +// Simple lock guard. +class Guard { +private: + + Lock* lock_; + bool acquired_; + +public: + + Guard(Lock& lock, const bool doTry = false) : lock_(&lock) { + if (doTry) { + acquired_ = lock_->tryLock(); + } else { + lock_->lock(); + acquired_ = true; + } + } + + Guard() : lock_(0), acquired_(false) { + } + + ~Guard() { + if (acquired_ && lock_ != 0) { + lock_->unlock(); + } + } + + bool isAcquired() const { + return acquired_; + } + +private: + + Guard& operator = (const Guard&); + Guard(const Guard&); +}; + +// Read lock guard. +class ReadGuard { +private: + + RWLock* lock_; + bool acquired_; + +public: + + ReadGuard(RWLock& lock, const bool doTry = false, const bool acquire=true) : lock_(&lock), acquired_(false) { + if (acquire) { + if (lock_ != 0) { + if (doTry) { + acquired_ = lock_->tryReadLock(); + } else { + lock_->readLock(); + acquired_ = true; + } + } + } + } + + ReadGuard() : lock_(0), acquired_(false) { + } + + void reset() { + acquired_ = false; + } + + ~ReadGuard() { + if (acquired_ && lock_ != 0) { + lock_->unlock(); + } + } + + bool isAcquired() const { + return acquired_; + } + +private: + + ReadGuard& operator = (const ReadGuard&); + ReadGuard(const ReadGuard&); +}; + +// Write lock guard. +class WriteGuard { +private: + + RWLock* lock_; + bool acquired_; + +public: + + WriteGuard(RWLock& lock, const bool doTry=false, const bool favorRead=false, const bool acquire=true) : lock_(&lock), acquired_(false) { + if (acquire) { + if (doTry) { + acquired_ = lock_->tryWriteLock(); + } else { + if (favorRead) { + while ( !lock_->tryWriteLock() ) { + my_sleep( 1000 ); + } + acquired_ = true; + } else { + lock_->writeLock(); + acquired_ = true; + } + } + } + } + + WriteGuard() : lock_(0), acquired_(false) { + } + + void acquire() { + if (!acquired_ && lock_ != 0) { + lock_->writeLock(); + acquired_ = true; + } + } + + void release() { + if (acquired_ && lock_ != 0) { + lock_->unlock(); + acquired_ = false; + } + } + + ~WriteGuard() { + release(); + } + + bool isAcquired() const { + return acquired_; + } + +private: + + WriteGuard& operator = (const WriteGuard&); + WriteGuard(const WriteGuard&); +}; + +} + +#endif /* #ifndef _engine_lock_h_ */ diff --git a/storage/sparrow/engine/log.h b/storage/sparrow/engine/log.h new file mode 100644 index 000000000000..7af1501e93f7 --- /dev/null +++ b/storage/sparrow/engine/log.h @@ -0,0 +1,45 @@ +#ifndef _engine_log_h_ +#define _engine_log_h_ + +#include "mysqld_error.h" +#include "mysql/components/services/log_builtins.h" + +namespace Sparrow { + +void spw_print_msg(loglevel level, const char* str_format, ...) MY_ATTRIBUTE((format(printf, 2, 3))); + +inline void spw_print_msg(loglevel level, const char* str_format, ...) +{ + va_list args; + va_start (args, str_format); + char buf[LOG_BUFF_MAX]; + std::vsnprintf(buf, LOG_BUFF_MAX, str_format, args); + LogErr(level, ER_LOG_PRINTF_MSG, buf); + va_end (args); +} + +} + +#define spw_print_system(str_format, ...) \ +do { \ + spw_print_msg(SYSTEM_LEVEL, str_format, ##__VA_ARGS__); \ +} while (0); + +#define spw_print_information(str_format, ...) \ +do { \ + spw_print_msg(INFORMATION_LEVEL, str_format, ##__VA_ARGS__); \ +} while (0); + +#define spw_print_warning(str_format, ...) \ +do { \ + spw_print_msg(WARNING_LEVEL, str_format, ##__VA_ARGS__); \ +} while (0); + +#define spw_print_error(str_format, ...) \ +do { \ + spw_print_msg(ERROR_LEVEL, str_format, ##__VA_ARGS__); \ +} while (0); + + + +#endif // _engine_log_h_ diff --git a/storage/sparrow/engine/master.cc b/storage/sparrow/engine/master.cc new file mode 100644 index 000000000000..2fff12de5a93 --- /dev/null +++ b/storage/sparrow/engine/master.cc @@ -0,0 +1,2163 @@ +/* + Master file. +*/ + +#include "master.h" +#include "transient.h" +#include "persistent.h" +#include "internalapi.h" +#include "alter.h" +#include "coalescing.h" +#include "../handler/hasparrow.h" +#include "purge.h" +#include "hash.h" +#include "fileutil.h" +#include "listener.h" + +//#include "../api/api_assert.h" + +#include "../engine/log.h" +#include "sql/mysqld.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Master +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// File version history: +// 1 Initial version. +// 2 No longer store column names +// (added class ColumnWithName to serialize name - necessary in Sparrow API). +// 3 Replace max table size by max lifetime. +// 4 Add aggregation period. +// 5 Add auto-incremental column. +// 6 Add index and column names. Ability to alter indexes. +// 7 No longer store column size in master file. +// 8 Multiple filesystems: store filesystem index with each partition. +// 9 Ability to alter indexes. +// 10 Ability to alter indexes, continued. +// 11 Remove useless Index::id_. +// 12 Add flag Index::dropped_. +// 13 Add PersistentPartition::version_. +// 14 Add coalescing period. +// 15 Persist alter duration. +// 16 Table-specific default WHERE time period. +// 17 Ability to add/remove columns. +// 18 Textual default value on columns. +// 19 Replace drop flag by drop serial number on columns. +// 20 Add data serial number. +// 21 Table-specific string optimization size. +// 22 Add skipped columns ids to each partition +// 23 Add data compression +// 24 Store timestamp precision in the column's attribute 'info' +const uint32_t Master::currentVersion_ = 24; + +//pthread_key(TABLE_SHARE*, Master::threadKey_); +thread_local TABLE_SHARE* Master::threadKey_{nullptr}; + +// STATIC +void Master::initialize() { + Master::threadKey_ = nullptr; +} + +Master::Master(const char* database, const char* table, const bool key /* = false */) + : id_(0), lock_(0), updateLock_(0), canUpdate_(0), updateOnGoing_(0), updating_(false), updateBlockers_(0), flushLock_(0), canFlush_(0), + database_(database, !key), table_(table, !key), dnsConfiguration_(0), indexAlterSerial_(0), indexAlterStarted_(0), indexAlterElapsed_(0), + coalescingPeriod_(3600000), defaultWhere_(0), stringOptimization_(0), coalescingTimestamp_(0), + version_(Master::currentVersion_), depLock_(0), depCond_(0) { + if (!isKey()) { + setup(true); + } +} + +void Master::setup(const bool full) { + database_ = Str(database_.c_str()); + table_ = Str(table_.c_str()); + id_ = MasterId::newId(); + MasterId::insert(this); + database_.toLower(); + table_.toLower(); + char tmp[1024]; + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::lock_", database_.c_str(), table_.c_str()); + lock_ = new RWLock(false, tmp); + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::updateLock_", database_.c_str(), table_.c_str()); + updateLock_ = new Lock(false, tmp); + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::canUpdate_", database_.c_str(), table_.c_str()); + canUpdate_ = new Cond(false, *updateLock_, tmp); + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::updateOnGoing_", database_.c_str(), table_.c_str()); + updateOnGoing_ = new Cond(false, *updateLock_, tmp); + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::flushLock_", database_.c_str(), table_.c_str()); + flushLock_ = new Lock(false, tmp); + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::canFlush_", database_.c_str(), table_.c_str()); + canFlush_ = new Cond(false, *flushLock_, tmp); + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::depLock_", database_.c_str(), table_.c_str()); + depLock_ = new Lock(false, tmp); + snprintf(tmp, sizeof(tmp), "Master(%s.%s)::depCond_", database_.c_str(), table_.c_str()); + depCond_ = new Cond(false, *depLock_, tmp); + if (full) { + serial_ = 0ULL; + timeCreated_ = std::time(nullptr); + dataSize_ = 0ULL; + indexSize_ = 0ULL; + records_ = 0ULL; + maxLifetime_ = 0; + aggregationPeriod_ = 0; + autoInc_ = 1; + } + acquireRef(); +} + +void Master::prepareForDeletion() { + SPARROW_ENTER("Master::prepareForDeletion"); + + // Stop dependencies and wait for them to complete. + { + Guard guard(getDepLock()); + SYSidlistIterator iterator(dependencies_); + while (++iterator) { + iterator.key()->stop(); + } + } + while (true) { + Guard guard(getDepLock()); + if (dependencies_.isEmpty()) { + break; + } + depCond_->wait(true); + } + MasterId::remove(this); + + // Close all opened files. + closeFiles(); + + // Detach partitions. + { + WriteGuard guard(getLock()); + for (uint32_t i = 0; i < partitions_.length(); ++i) { + partitions_[i]->detach(); + [[maybe_unused]] bool last_ref = partitions_[i]->releaseRef(); + assert( last_ref == true ); + } + partitions_.clearAndDestroy(); + intervals_.clear(); + transientPartitions_.clear(); + } + releaseRef(); +} + +Master::~Master() { + SPARROW_ENTER("Master::~Master"); + if (!isKey()) { + DBUG_PRINT("sparrow_master", ("Destroying files for table %s.%s", database_.c_str(), table_.c_str())); + + // Make sure this master is no longer seen from the API. + assert(!InternalApi::hashContains(this)); + if (dnsConfiguration_ != 0 && dnsConfiguration_->isStarted()) { + DnsConfiguration::release(dnsConfiguration_.get()); + } + + delete lock_; + delete canUpdate_; + delete updateOnGoing_; + delete updateLock_; + delete depLock_; + + // Delete all subdirectories, data and index files. + char buffer[FN_REFLEN]; + const char* masterFile = getMasterFileName(buffer); + my_delete(masterFile, MYF(0)); + const Filesystems& filesystems = FileUtil::getFilesystems(true); + for (uint32_t i = 0; i < filesystems.length(); ++i) { + const Filesystem& filesystem = *filesystems[i]; + snprintf(buffer, sizeof(buffer), "%s/%s/%s", filesystem.getPath().c_str(), database_.c_str(), table_.c_str()); + char dir[FN_REFLEN]; + if (formatFileName(dir, "", buffer, "")) { + FileUtil::deleteDirectory(dir); + } + } + } +} + +// Close files currently opened in this table. +void Master::closeFiles() { + const uint32_t length = partitions_.length(); + for (uint32_t i = 0; i < length; ++i) { + const Partition& partition = *partitions_[i]; + if (partition.isTransient()) { + continue; + } + const PersistentPartition& persistentPartition = static_cast(partition); + char name[FN_REFLEN]; + FileCache::releaseFile(FileId(persistentPartition.getFileName(DATA_FILE, name), FILE_TYPE_DATA, FILE_MODE_READ), false); + FileCache::releaseFile(FileId(persistentPartition.getFileName(STRING_FILE, name), FILE_TYPE_STRING, FILE_MODE_READ), false); + const uint32_t nbIndexes = getIndexes().length(); + for (uint32_t i = 0; i < nbIndexes; ++i) { + FileCache::releaseFile(FileId(persistentPartition.getFileName(i, name), FILE_TYPE_INDEX, FILE_MODE_READ), false); + } + } +} + +// Rename this table. +void Master::rename(const char* newDatabase, const char* newTable) _THROW_(SparrowException) { + SPARROW_ENTER("Master::rename"); + DBUG_PRINT("sparrow_master", ("Renaming master file %s.%s to %s.%s", database_.c_str(), table_.c_str(), newDatabase, newTable)); + + // Close all opened files. + closeFiles(); + + // Rename master file. + char from[FN_REFLEN]; + char to[FN_REFLEN]; + const char* database = database_.c_str(); + const char* table = table_.c_str(); + Master::getMasterFileName(from, database, table, true); + if (FileUtil::doesFileExist(from)) { + Master::getMasterFileName(to, newDatabase, newTable, true); + FileUtil::rename(from, to); + } + + // Rename data directories. + const Filesystems& filesystems = FileUtil::getFilesystems(true); + for (uint32_t i = 0; i < filesystems.length(); ++i) { + const Filesystem& filesystem = *filesystems[i]; + snprintf(from, sizeof(from), "%s/%s/%s", filesystem.getPath().c_str(), database, table); + char dirFrom[FN_REFLEN]; + snprintf(to, sizeof(to), "%s/%s/%s", filesystem.getPath().c_str(), newDatabase, newTable); + char dirTo[FN_REFLEN]; + if (formatFileName(dirFrom, "", from, "") && formatFileName(dirTo, "", to, "")) { + dirFrom[strlen(dirFrom) - 1] = 0; + dirTo[strlen(dirTo) - 1] = 0; + if (FileUtil::doesFileExist(dirFrom)) { + FileUtil::createDirectories( dirTo ); + FileUtil::rename(dirFrom, dirTo); + } + } + } + + // Change internal database and table names. + database_ = Str(newDatabase); + table_ = Str(newTable); +} + +PersistentPartitionGuard Master::findMainPartition( uint64_t serial, TimePeriod period, uint32_t columnAlterSerial, uint32_t indexAlterSerial, const ColumnIds& emptyColumnIds ) { + SPARROW_ENTER("Master::findMainPartition"); + const uint64_t coalescingPeriod = getCoalescingPeriod(); + const TimePeriod tperiod = period; + if (coalescingPeriod != 0) { + const uint64_t tmin = tperiod.getMin(); + const uint64_t cmin = tmin - tmin % coalescingPeriod; + const uint64_t cmax = cmin + coalescingPeriod; + const TimePeriod cperiod(&cmin, &cmax, true, false); +#ifndef NDEBUG + const Str coales = Str::fromDuration( coalescingPeriod ); + const Str trs_period = Str::fromTimePeriod(tperiod); + const Str prs_period = Str::fromTimePeriod(cperiod); + DBUG_PRINT("sparrow_transient", ("Transient partition %llu: coalescing p %s, %s --> %s", + static_cast(serial), coales.c_str(), trs_period.c_str(), prs_period.c_str())); +#endif + + PersistentPartition* pp = NULL; + uint32_t best_value = 0xFFFFFFFFUL; + Intervals intervals; + intervals_.findOverlaps(cperiod, intervals); + for (uint32_t i = 0; i < intervals.length(); ++i) { + PersistentPartition* p = static_cast(intervals[i]); + if (p->isMain() && p->getVersion() >= PersistentPartition::appendVersion_ + && p->getColumnAlterSerial() == columnAlterSerial && p->getSkippedColumns().contains(emptyColumnIds)) { + if (best_value > emptyColumnIds.entries() - p->getSkippedColumns().entries()) { + best_value = emptyColumnIds.entries() - p->getSkippedColumns().entries(); + pp = p; + if (best_value == 0) + break; + } + } + } + if (best_value <= sparrow_column_optimisation_lvl) { + assert(pp != NULL); + const TimePeriod pperiod = pp->getPeriod(); +#ifndef NDEBUG + TimePeriod period_union = pperiod.makeUnion(cperiod); + const Str pperiod_str = Str::fromTimePeriod(pperiod); + const Str period_union_str = Str::fromTimePeriod(period_union); + DBUG_PRINT("sparrow_transient", ("Main persisted partition %s.%s.%lu: %s =?= %s", getDatabase().c_str(), + getTable().c_str(), pp->getSerial(), pperiod_str.c_str(), period_union_str.c_str())); +#endif + if (pperiod.makeUnion(cperiod) == cperiod) { +#ifndef NDEBUG + const Str speriod = Str::fromTimePeriod(tperiod); + const Str mperiod = Str::fromTimePeriod(pperiod); + DBUG_PRINT("sparrow_transient", ("Found main partition for %s.%s.%llu %s: %llu %s", getDatabase().c_str(), + getTable().c_str(), static_cast(serial), speriod.c_str(), static_cast(pp->getSerial()), mperiod.c_str())); +#endif + return PersistentPartitionGuard(pp); + } + } + } + + // No main partition found; create new one. + PersistentPartition* mainPartition = newPersistentPartition(PersistentPartition::currentVersion_, SAME_AS_SERIAL, + FileUtil::chooseFilesystem(false), tperiod, 0, indexAlterSerial, columnAlterSerial, 0, 0, emptyColumnIds); + mainPartition->acquireRef(); +#ifndef NDEBUG + const Str speriod = Str::fromTimePeriod(tperiod); + DBUG_PRINT("sparrow_transient", ("Did not find main partition for %s.%s.%llu %s: created new with serial %llu", getDatabase().c_str(), + getTable().c_str(), static_cast(serial), speriod.c_str(), static_cast(mainPartition->getSerial()))); +#endif + return PersistentPartitionGuard(mainPartition); +} + +// Add to a vector all partitions in a given time period. +void Master::getPartitionsForTimePeriod(const TimePeriod& period, ReferencedPartitions& entries, QueryInfo& queryInfo) const { + SPARROW_ENTER("Master::getPartitionsForTimePeriod"); + + // Find overlaps in persistent partitions. + if (period.isVoid()) { + return; + } + Intervals intervals; + intervals_.findOverlaps(period, intervals); + for (uint32_t i = 0; i < intervals.length(); ++i) { + PersistentPartition* partition = static_cast(intervals[i]); + PartitionGuard partitionGuard(partition); + if (!entries.contains(partitionGuard) && partition->getPeriod().intersects(period) + && (!partition->isMain() || partition->getVersion() < PersistentPartition::appendVersion_)) { +#ifndef NDEBUG + const Str speriod = Str::fromTimePeriod(partitionGuard->getPeriod()); + DBUG_PRINT("sparrow_context", ("Load partition %llu: %s", static_cast(partitionGuard->getSerial()), speriod.c_str())); +#endif + entries.insert(partitionGuard); + } + } + + // Find overlaps in transient partitions. + for (uint32_t i = 0; i < transientPartitions_.length(); ++i) { + TransientPartition* partition = transientPartitions_[i]; + if (period.intersects(partition->getPeriod())) { + PartitionGuard partitionGuard(partition); + if (!entries.contains(partitionGuard)) { + // Snapshot the current number of transient rows. + if (queryInfo.snapshotTransientPartition(partition)) { + // Insert only partitions with data. + entries.insert(partitionGuard); + } + } + } + } +} + +// STATIC +const char* Master::getMasterFileName(char* buffer, const char* database, const char* table, + const bool appendExtension) _THROW_(SparrowException) { + SPARROW_ENTER("Master::getMasterFileName"); + if (!formatFileName(buffer, table, database, appendExtension ? ".spm" : "")) { + throw SparrowException::create(false, "Cannot build master file name for table %s.%s", + database, table); + } + return buffer; +} + +// STATIC +bool Master::formatFileName(char* to, const char* name, const char* dir, const char *extension) { + char tmp[FN_REFLEN]; + if (!test_if_hard_path(dir)) { + strxnmov(tmp, sizeof(tmp) - 1, mysql_real_data_home, dir, NullS); + dir = tmp; + } + return fn_format(to, name, dir, extension, MY_APPEND_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH) != 0; +} + +const char* Master::getMasterFileName(char* buffer) const _THROW_(SparrowException) { + SPARROW_ENTER("Master::getMasterFileName"); + return getMasterFileName(buffer, database_.c_str(), table_.c_str(), true); +} + +const char* Master::getDataDirectory(const uint32_t filesystem, char* buffer) const _THROW_(SparrowException) { + SPARROW_ENTER("Master::getDataDirectory"); + char subdir[FN_REFLEN]; + snprintf(subdir, sizeof(subdir), "%s/%s/%s/", FileUtil::getFilesystemPath(filesystem), database_.c_str(), table_.c_str()); + char tmp[FN_REFLEN]; + const char* dir = subdir; + if (!test_if_hard_path(dir)) { + strxnmov(tmp, sizeof(tmp) - 1, mysql_real_data_home, dir, NullS); + dir = tmp; + } + if (fn_format(buffer, "", dir, "", MY_SAFE_PATH) == 0) { + throw SparrowException::create(false, "Cannot build directory name for table %s.%s", + getDatabase().c_str(), getTable().c_str()); + } + return buffer; +} + +// STATIC +bool Master::checkForCorruption(struct tm& t) { + if (t.tm_year < 100 || t.tm_year > 150 + || t.tm_mday < 1 || t.tm_mday > 31 + || t.tm_mon < 0 || t.tm_mon > 11 ) { + char t_str[255]; + strftime(t_str, sizeof(t_str), "%Y-%m-%d %H:%M:%S", &t); + spw_print_error("bad partition timestamp: %s", t_str); + return false; + } + return true; +} + +// Build data/index file name for a given time period and file id. +const char* Master::getFileName(const uint32_t version, const uint32_t filesystem, const TimePeriod& period, + const uint32_t fileId, const uint64_t serial, const uint64_t dataSerial, char* buffer) const _THROW_(SparrowException) { + SPARROW_ENTER("Master::getFileName"); + + // Initialize all locals here to avoid gcc error because of goto. + char subdir[FN_REFLEN]; + time_t start; + struct tm t; + char filename[FN_REFLEN]; + const char* extension; + uint32_t lowMinutes; + uint32_t lowSeconds; + uint32_t lowMilliseconds; + uint32_t upMinutes; + uint32_t upSeconds; + uint32_t upMilliseconds; + int l1 = snprintf(subdir, sizeof(subdir), "%s/%s/%s/", FileUtil::getFilesystemPath(filesystem), database_.c_str(), table_.c_str()); + if (l1 <= 0) { + goto internalError; + } + start = static_cast(period.getMin() / 1000); + if (gmtime_r(&start, &t) == 0) { + goto internalError; + } + if (checkForCorruption(t) == false) { + goto internalError; + } + if (strftime(subdir + l1, sizeof(subdir) - l1, "%Y%m%d/%H", &t) == 0) { + goto internalError; + } + lowMinutes = static_cast((period.getMin() % 3600000) / 60000); + lowSeconds = static_cast((period.getMin() % 60000) / 1000); + lowMilliseconds = static_cast(period.getMin() % 1000); + upMinutes = static_cast((period.getMax() % 3600000) / 60000); + upSeconds = static_cast((period.getMax() % 60000) / 1000); + upMilliseconds = static_cast(period.getMax() % 1000); + if (fileId == DATA_FILE) { + if (version >= PersistentPartition::appendVersion_) { + if (snprintf(filename, sizeof(filename), "%010llu", static_cast(dataSerial)) <= 0) { + goto internalError; + } + } else { + if (snprintf(filename, sizeof(filename), "%010llu_%02u%02u%03u_%02u%02u%03u", + static_cast(serial), lowMinutes, lowSeconds, lowMilliseconds, upMinutes, upSeconds, upMilliseconds) <= 0) { + goto internalError; + } + } + extension = ".spd"; + } else if (fileId == STRING_FILE) { + if (snprintf(filename, sizeof(filename), "%010llu", static_cast(dataSerial)) <= 0) { + goto internalError; + } + extension = ".sps"; + } else { + if (version >= PersistentPartition::appendVersion_) { + if (snprintf(filename, sizeof(filename), "%010llu_%010llu_%02d", static_cast(dataSerial), static_cast(serial), fileId) <= 0) { + goto internalError; + } + } else { + if (snprintf(filename, sizeof(filename), "%010llu_%02u%02u%03u_%02u%02u%03u_%02d", + static_cast(serial), lowMinutes, lowSeconds, lowMilliseconds, upMinutes, upSeconds, upMilliseconds, fileId) <= 0) { + goto internalError; + } + } + extension = ".spi"; + } + if (!formatFileName(buffer, filename, subdir, extension)) { + goto internalError; + } + return buffer; +internalError: + const char* type = "index"; + if (fileId == DATA_FILE) { + type = "data"; + } else if (fileId == STRING_FILE) { + type = "string"; + } + throw SparrowException::create(false, "Cannot build %s file name for table %s.%s", + type, getDatabase().c_str(), getTable().c_str()); +} + +// STATIC +Master* Master::fromDisk(const char* database, const char* table, TABLE_SHARE* s) _THROW_(SparrowException) { + SPARROW_ENTER("Master::fromDisk"); + Master::threadKey_ = s; + char filename[FN_REFLEN]; + Master* master = new Master(database, table, true); + AutoPtr guard(master); + master->getMasterFileName(filename); + DBUG_PRINT("sparrow_master", ("Reading master %s.%s from file %s", database, table, filename)); + if (!FileUtil::doesFileExist(filename)) { + return 0; + } + FileReader reader(filename); + reader >> *master; + guard.release(); + master->setup(false); + return master; +} + +// Set update time and write master file to disk. +void Master::toDisk() _THROW_(SparrowException) { + SPARROW_ENTER("Master::toDisk"); + timeUpdated_ = std::time(nullptr); + char filename[FN_REFLEN]; + getMasterFileName(filename); + DBUG_PRINT("sparrow_master", ("Writing master %s.%s to file %s", database_.c_str(), table_.c_str(), filename)); + FileWriter writer(filename, FILE_TYPE_MISC, FILE_MODE_CREATE); + writer << *this; + writer.write(); +} + +TransientPartitionGuard Master::getTransientPartition( const uint64_t& timestamp ) { + SPARROW_ENTER("Master::getTransientPartition"); + WriteGuard guard(getLock()); + TransientPartition* partition = NULL; + const Str ts_str = Str::fromTimestamp(timestamp); + if ( coalescingPeriod_ == 0 || timestamp == 0 ) { + TransientPartition* part = transientPartitions_.isEmpty() ? 0 : transientPartitions_.last(); + if ( !part ) { + DBUG_PRINT("sparrow_transient", ("Get transient partition for %s.%s, for %s. No transient part.", database_.c_str(), table_.c_str(), ts_str.c_str())); + } + else if ( part->isDone() ) { + DBUG_PRINT("sparrow_transient", ("Get transient partition for %s.%s, for %s. But last is done.", database_.c_str(), table_.c_str(), ts_str.c_str())); + } else { + partition = part; + DBUG_PRINT("sparrow_transient", ("Get transient partition for %s.%s, for %s. Using last %llu.", + database_.c_str(), table_.c_str(), ts_str.c_str(), static_cast(partition->getSerial()))); + } + } else { + for ( int i=transientPartitions_.entries()-1; i>=0; --i ) { + TransientPartition* part = transientPartitions_[i]; + if ( part->isDone() ) + continue; + TimePeriod period = part->getPeriod(); + if ( period.getMin() == 0 || period.getMin() == ULLONG_MAX ) { + DBUG_PRINT("sparrow_transient", ("Get transient partition for %s.%s, for %s. Part %llu has no timestamp. Ignoring.", + database_.c_str(), table_.c_str(), ts_str.c_str(), static_cast(part->getSerial()))); + } else { + uint64_t low = period.getMin(); + low -= low%coalescingPeriod_; + uint64_t up = low + coalescingPeriod_; + TimePeriod period_adj( &low, &up, true, false ); + if ( period_adj.contains( timestamp ) ) { + partition = part; + DBUG_PRINT("sparrow_transient", ("Get transient partition for %s.%s, for %s. Using existing one, %llu.", + database_.c_str(), table_.c_str(), ts_str.c_str(), static_cast(partition->getSerial()))); + break; + } + } + } + } + if ( partition == 0 ) { + partition = new TransientPartition(this, serial_++); + partition->acquireRef(); + assert(!partitions_.contains(partition)); + partitions_.insert(partition); + transientPartitions_.insert(partition); + DBUG_PRINT("sparrow_transient", ("Get transient partition for %s.%s, for %s. Created new one, %llu.", + database_.c_str(), table_.c_str(), ts_str.c_str(), static_cast(partition->getSerial()))); + } + return TransientPartitionGuard(partition); +} + +void Master::getTransientPartitions( TransientPartitions& partitions ) const { + SPARROW_ENTER("Master::getTransientPartitions"); + uint32_t nb = transientPartitions_.length(); + partitions.resize( nb ); + for ( uint i=0; iisTemporary()) { + temporary = true; + } else { + dataSize += partition->getDataSize(); + indexSize += partition->getIndexSize(); + records += partition->getRecords(); + if (!partition->isMain()) { + partition->getMainPartition()->removeChildPartition(partition); + } + } + DBUG_PRINT("sparrow_master", ("Removing partition %s.%s.%llu", + getDatabase().c_str(), getTable().c_str(), static_cast(partition->getSerial()))); + partition->releaseRef(); + } + setDataSize(getDataSize() - dataSize); + setIndexSize(getIndexSize() - indexSize); + setRecords(getRecords() - records); + return temporary; +} + +void Master::listPartitionsForMain(const PersistentPartition& mainPartition, PersistentPartitions& partitions) const _THROW_(SparrowException) { + SPARROW_ENTER("Master::listPartitionsForMain"); + const uint64_t dataSerial = mainPartition.getSerial(); + if (mainPartition.getVersion() >= PersistentPartition::appendVersion_) { + for (uint32_t i = 0; i < partitions_.length(); ++i) { + Partition* p = partitions_[i]; + if (!p->isMain() && p->getDataSerial() == dataSerial && !p->isTemporary()) { + if (p->isTransient() || coalescedSerials_.contains(p->getSerial())) { + // Cannot purge if main partition is referenced by a transient partition + // or if the partition is being coalesced. + partitions.clear(); + if (p->isTransient()) { + throw SparrowException::create(false, "Cannot remove partition %s.%s.%llu because it is still used for data insertion", + getDatabase().c_str(), getTable().c_str(), static_cast(p->getSerial())); + } else { + throw SparrowException::create(false, "Cannot remove partition %s.%s.%llu because it is being coalesced", + getDatabase().c_str(), getTable().c_str(), static_cast(p->getSerial())); + } + } + partitions.insert(PersistentPartitionGuard(static_cast(p))); + } + } + } else if (coalescedSerials_.contains(dataSerial)) { + throw SparrowException::create(false, "Cannot remove partition %s.%s.%llu because it is being coalesced", + getDatabase().c_str(), getTable().c_str(), static_cast(dataSerial)); + } + partitions.insert(PersistentPartitionGuard(const_cast(&mainPartition))); +} + +void Master::removePartitions(const TimePeriod& period) _THROW_(SparrowException) { + SPARROW_ENTER("Master::removePartitions"); + PersistentPartitions partitions; + { + WriteGuard guard(getLock()); + Intervals intervals; + intervals_.findOverlaps(period, intervals); + for (uint32_t i = 0; i < intervals.length(); ++i) { + PersistentPartition* partition = static_cast(intervals[i]); + if (partition->isMain() && partition->getPeriod().intersects(period)) { + PersistentPartitionGuard partitionGuard(partition); + if (!partitions.contains(partitionGuard)) { + listPartitionsForMain(*partition, partitions); + } + } + } + removePartitions(partitions); + toDisk(); + } +} + +bool Master::forceFlush() { + SPARROW_ENTER("Master::forceFlush"); + //WriteGuard guard(getLock()); + TransientPartitions transientPartitions; + { + ReadGuard guard(getLock()); + getTransientPartitions( transientPartitions ); + } + return forceFlushNoLock( transientPartitions ); +} + +bool Master::forceFlushNoLock( const TransientPartitions& transientPartitions, bool master_lock_taken ) { + SPARROW_ENTER("Master::forceFlushNoLock"); + if (transientPartitions.isEmpty()) { + return false; + } + bool result = false; + uint32_t i = transientPartitions.length(); + do { + TransientPartition* partition = transientPartitions[--i]; + if (partition->forceFlush( master_lock_taken )) { + result = true; + } + } while (i > 0); + return result; +} + +void Master::waitForFlush() { + SPARROW_ENTER("Master::waitForFlush"); + for (;;) { + { + ReadGuard guard(getLock()); + if (transientPartitions_.isEmpty()) { + return; + } + } + my_sleep(100000); + } +} + +void Master::waitForFlush(const PartitionIds& flushedPartitions) { + SPARROW_ENTER("Master::waitForFlush"); + for (;;) { + { + ReadGuard guard(getLock()); + uint32_t i, j, nb = transientPartitions_.length(); + uint32_t nbFlushed = flushedPartitions.length(); + for (i=0; igetSerial(); + for (j=0; j validColumns.length() ) { + throw SparrowException::create(false, "Cannot insert data in %s.%s because client app sent too many columns: %u vs %u", + getDatabase().c_str(), getTable().c_str(), colNames.entries(), validColumns.length()); + } + + // Check that the same column is not specified twice. + for (uint32_t i=0; igetRecordsSafe(); + } + return records; +} + +uint64_t Master::getNormalizedSize() const { + const uint64_t age = getAge(); + const double weight = age == 0 ? 0 : static_cast(getMaxLifetime()) / age; + return static_cast(weight * (getDataSize() + getIndexSize())); +} + +bool Master::needToPurge(const uint64_t limit, const uint64_t total, const uint64_t totalNormalized, bool& force, const bool mode) const { + SPARROW_ENTER("Master::needToPurge"); + + ReadGuard guard(getLock()); + + force = false; + if (partitions_.length() <= 1) { + return false; + } + const uint64_t normalizedSize = getNormalizedSize(); + if (normalizedSize == 0) { + return false; + } + + uint64_t age = 0; + if (mode == PURGE_MODE_CONSTANTLY) { + const uint64_t t = Scheduler::now(); + age = getAge(t); + } else { + age = getAge(); + } + if (age > getMaxLifetime()) { + DBUG_PRINT("sparrow_purge", ("Need to purge table %s.%s: max lifetime exceeded (mode %u): %llu > %llu", + getDatabase().c_str(), getTable().c_str(), mode, static_cast(getAge()), static_cast(getMaxLifetime()))); + return true; + } + if (total < limit) { + return false; + } + const double weight = static_cast(normalizedSize) / totalNormalized; + const uint64_t size = getDataSize() + getIndexSize(); + const bool result = size > static_cast(weight * limit); + if ( result ) { + force = true; + } +#ifndef NDEBUG + if (result) { + DBUG_PRINT("sparrow_purge", ("Need to purge table %s.%s: threshold crossed (mode %u) (%llu > %llu, total=%llu, limit=%llu, totalNormalized=%llu, normalizedSize=%llu, weight=%f)", + getDatabase().c_str(), getTable().c_str(), mode, static_cast(size), static_cast(weight * limit), static_cast(total), + static_cast(limit), static_cast(totalNormalized), static_cast(normalizedSize), weight)); + } +#endif + return result; +} + +void Master::logPartitions() const { + + DBUG_PRINT("sparrow_purge", ("Partition list for table %s.%s, ", database_.c_str(), table_.c_str())); + IntervalTreeNode* node = intervals_.getMin(); + while ( node != 0 ) { + PersistentPartition* partition = static_cast(node->getInterval()); + + const uint64_t low = partition->getMin(); + const uint64_t high = partition->getMax(); + const Str low_ts = Str::fromTimestamp(low); + const Str high_ts = Str::fromTimestamp(high); + DBUG_PRINT("sparrow_purge", ("Partition: serial %llu, data serial %llu, [low %s, high %s]", + static_cast(partition->getSerial()), static_cast(partition->getDataSerial()), low_ts.c_str(), high_ts.c_str())); + + node = intervals_.getNext( node ); + } +} + +// Purges the oldest persistent main partition. The caller must acquire the write lock of this master file. +// Partitions to purge are returned in a vector of guards so they can be actually deleted outside the lock. +// Partitions currently being coalesced cannot be purged. +// Returns true if the returned partitions still contain valid data (only occurs if force is true, meaning if disk space is too low). +// False if they're beyond their max lifetime. +bool Master::purge(PersistentPartitions& partitions, const bool force, const bool mode) { + SPARROW_ENTER("Master::purge"); + + WriteGuard guard(getLock()); + + IntervalTreeNode* node = intervals_.getMin(); + PersistentPartition* partition = node == 0 ? 0 : static_cast(node->getInterval()); + if (partition == 0) { + return false; + } + + const PersistentPartition& mainPartition = *partition->getMainPartition(); + + // If a transient partition is being flushed to that main partition, do not delete it. + if ( isFlushing(mainPartition.getSerial()) ) { + return false; + } + + bool forced = false; + PersistentPartitions tmp; + try { + listPartitionsForMain(mainPartition, tmp); + + DBUG_PRINT("sparrow_purge", ("Oldest Main partition (i.e. purge candidate) includes: ")); + uint64_t newestToDel = 0; + for (uint32_t i = 0; i < tmp.length(); ++i) { + const PersistentPartition* partition = tmp[i]; + const uint64_t low = partition->getMin(); + const uint64_t high = partition->getMax(); + const Str low_ts = Str::fromTimestamp(low); + const Str high_ts = Str::fromTimestamp(high); + DBUG_PRINT("sparrow_purge", ("Partition: serial %llu, data serial %llu, [low %s, high %s]", + static_cast(partition->getSerial()), static_cast(partition->getDataSerial()), low_ts.c_str(), high_ts.c_str())); + newestToDel = std::max(newestToDel, high); + } + + // Check that destroying oldest partition will not destroy still valid data samples. + const uint64_t high = (mode == PURGE_MODE_CONSTANTLY ? Scheduler::now() : getNewest()); + const uint64_t lifetime = getMaxLifetime(); + uint64_t obsolescence = high - lifetime; + if ( newestToDel >= obsolescence ) { + if (!force) { + const Str obsol_ts = Str::fromTimestamp(obsolescence); + const Str high_ts = Str::fromTimestamp(high); + const Str newestToDel_ts = Str::fromTimestamp(newestToDel); + DBUG_PRINT("sparrow_purge", ("Should not delete oldest partition because it contains data upto %s which is still valid " + "(data lifetime reaches down to %s, newest %s)", newestToDel_ts.c_str(), obsol_ts.c_str(), high_ts.c_str())); + return false; + } + forced = true; + } + + partitions.append(tmp); + removePartitions(tmp); + toDisk(); + } catch(const SparrowException& e) { + // Ignore error. + DBUG_PRINT("sparrow_purge", ("Exception : %s", e.getText())); + } + return forced; +} + +uint64_t Master::listPartitionsForFilesystem(const uint32_t filesystem, PersistentPartitions& partitions, + const uint64_t limit, const uint64_t totalNormalized) const { + SPARROW_ENTER("Master::listPartitionsForFilesystem"); + IntervalTreeNode* node = intervals_.getMin(); + while (node != 0) { + PersistentPartition* partition = static_cast(node->getInterval()); + if (coalescedSerials_.contains(partition->getSerial())) { + break; + } + if (partition->isMain() ) { + try { + listPartitionsForMain(*partition, partitions); + } catch(const SparrowException& e) { + break; + } + if ( partition->getFilesystem() == filesystem ) { + const uint64_t normalizedSize = getNormalizedSize(); + uint64_t delta = 0; + if (normalizedSize != 0 && totalNormalized != 0) { + const uint64_t size = getDataSize() + getIndexSize(); + const double weight = static_cast(normalizedSize) / totalNormalized; + const uint64_t weightedSize = static_cast(weight * limit); + delta = weightedSize > size ? weightedSize - size : 0; + } + #ifndef NDEBUG + const Str ssize(Str::fromSize(partition->getDataSize() + partition->getIndexSize())); + DBUG_PRINT("sparrow_purge", ("Table %s.%s: got %u partitions for file system %u, table size=%s, delta=%llu", database_.c_str(), table_.c_str(), + partitions.length(), filesystem, ssize.c_str(), static_cast(delta))); + #endif + return delta; + } + } + node = intervals_.getNext(node); + } + partitions.clear(); + return 0; +} + +bool Master::purgePartitionsForFilesystem(const PersistentPartitions& partitions) { + SPARROW_ENTER("Master::purgePartitionsForFilesystem"); + for (uint32_t i = 0; i < partitions.length(); ++i) { + PersistentPartition* partition = partitions[i].get(); + if (coalescedSerials_.contains(partition->getSerial())) { + return false; + } + } + removePartitions(partitions); + toDisk(); + return true; +} + +TransientPartitions Master::setDnsConfiguration(const DnsConfiguration& configuration) { + SPARROW_ENTER("Master::setDnsConfiguration"); + if (dnsConfiguration_ != 0 && dnsConfiguration_->isStarted()) { + DnsConfiguration::release(dnsConfiguration_.get()); + } + dnsConfiguration_ = 0; + TransientPartitions partitions; + if (!configuration.isEmpty()) { + dnsConfiguration_ = DnsConfiguration::acquire(configuration); +#ifndef NDEBUG + const Str s(dnsConfiguration_->print()); + DBUG_PRINT("sparrow_dns", ("Set DNS configuration on table %s.%s: %s", getDatabase().c_str(), getTable().c_str(), s.c_str())); +#endif + + // Return transient partitions to update. + uint32_t i = transientPartitions_.length(); + if (i > 0) { + do { + TransientPartition* partition = transientPartitions_[--i]; + partitions.append(TransientPartitionGuard(partition)); + } while (i > 0); + } + } + return partitions; +} + +void Master::repair() { + // Check that persisted partitions are not corrupted. + // Currently, we only check that the .spd is present. If not, the reference to the partition is removed from the master file. + PersistentPartitions toremove; + { + WriteGuard guard(getLock()); + spw_print_information("Starting repair on %s.%s", database_.c_str(), table_.c_str()); + for (uint32_t i = 0; i < partitions_.length(); ++i) { + Partition* p = partitions_[i]; + if ( p->isTransient() || p->isTemporary() ) { + continue; + } + PersistentPartitionGuard pp(static_cast(p)); + char filename[FN_REFLEN]; + pp->getFileName(DATA_FILE, filename); + if ( !FileUtil::doesFileExist( filename ) ) { + toremove.append(pp); + } + } + if ( toremove.isEmpty() ) { + spw_print_information("Finished repair on %s.%s. Everything Ok.", database_.c_str(), table_.c_str()); + } else { + for ( uint i=0; igetFileName(DATA_FILE, filename); + spw_print_information("Partition %s does not exists. Removing.", filename); + } + removePartitions( toremove ); + toDisk(); + spw_print_information("Finished repair on %s.%s: removed %d referenced to non-existing partitions.", database_.c_str(), table_.c_str(), toremove.length()); + } + } +} + +// Deserialization. +ByteBuffer& operator >> (ByteBuffer& buffer, Master& master) { + uint32_t version; + buffer >> version; + buffer.setVersion(version); + DnsConfiguration dnsConfiguration; + buffer >> master.columns_ >> master.indexes_ >> master.indexMappings_ + >> master.foreignKeys_ >> dnsConfiguration >> master.maxLifetime_; + if (version < 3) { + master.maxLifetime_ = 0; + } + if (version < 4) { + master.aggregationPeriod_ = 0; + } else { + buffer >> master.aggregationPeriod_; + } + if (version >= 16) { + buffer >> master.defaultWhere_; + } else { + master.defaultWhere_ = 86400000L; + } + if (version >= 21) { + buffer >> master.stringOptimization_; + } else { + master.stringOptimization_ = 0; + } + if (version < 5) { + master.autoInc_ = 1; + } else { + buffer >> master.autoInc_; + } + if (version < 6) { + // Set column names and charsets from table definition in thread-local storage. + TABLE_SHARE* s = Master::threadKey_; + if (s != 0) { + Columns& columns = master.columns_; + for (uint32_t i = 0; i < columns.length(); ++i) { + const Column& column = columns[i]; + Column col(s->field[i]->field_name, column.getType(), + column.getFlags(), column.getInfo(), s->field[i]->charset()->csname, Str()); + columns[i] = col; + //columns[i] = Column(s->field[i]->field_name, column.getType(), + // column.getFlags(), column.getInfo(), s->field[i]->charset()->csname, Str()); + } + } + } + buffer >> (uint64_t&)master.serial_ >> master.timeCreated_ + >> master.timeUpdated_ >> master.dataSize_ >> master.indexSize_ >> master.records_; + if (version >= 6 && version < 9) { + SYSvector droppedIndexes; + SYSvector addedIndexes; + buffer >> droppedIndexes >> addedIndexes; + uint32_t serial = 1; + for (uint32_t i = 0; i < droppedIndexes.length(); ++i) { + master.indexAlterations_.append(Alteration(ALT_DROP_INDEX, serial++, droppedIndexes[i])); + } + for (uint32_t i = 0; i < addedIndexes.length(); ++i) { + master.indexAlterations_.append(Alteration(ALT_ADD_INDEX, serial++, addedIndexes[i])); + } + master.indexAlterSerial_ = serial - 1; + } + if (version >= 9) { + buffer >> master.indexAlterSerial_; + if (version >= 15) { + buffer >> master.indexAlterElapsed_; + } + buffer >> master.indexAlterations_; + } + if (version >= 14) { + buffer >> master.coalescingPeriod_; + } + master.computeMappedColumnIds(); + uint32_t length; + buffer >> length; + for (uint32_t i = 0; i < length; ++i) { + PersistentPartition* partition = new PersistentPartition(&master); + buffer >> *partition; + + if (partition->makeChecks() == false) { + spw_print_error("Ignoring invalid partition %s.%s.%llu.", master.getDatabase().c_str(), master.getTable().c_str(), static_cast(partition->getSerial())); + continue; + } + + // Discard empty and duplicate partitions, just in case. + if ((partition->getRecords() > 0 || partition->getDataRecords() > 0) && !master.partitions_.contains(partition)) { + if (partition->getSerial() >= master.serial_) { + // Adjust master serial in case of inconsistency. + master.serial_ = partition->getSerial() + 1; + } + master.partitions_.append(partition); + master.intervals_.insert(partition); + } else if (!master.partitions_.isEmpty()) { + // Delete partition only if the master file references at least one partition, + // otherwise the master file is deleted! Better have a small memory leak. + partition->releaseRef(); + delete partition; + } + } + + // Setup pointers to main partitions and compute stats. + uint64_t dataSize = 0; + uint64_t indexSize = 0; + uint64_t records = 0; + Partitions& partitions = master.partitions_; + uint32_t minSerial = UINT_MAX; + for (uint32_t i = 0; i < partitions.length(); ++i) { + PersistentPartition* partition = static_cast(partitions[i]); + PersistentPartition* mainPartition = partition; + if (!partition->isMain()) { + mainPartition = static_cast(master.getPartitionNoLock(partition->getDataSerial()).get()); + } + if (mainPartition == 0) { + // Just in case of bug. + partitions.removeAt(i--); // Wrapping. + master.intervals_.remove(*partition); + + // Delete partition only if the master file references at least one partition, + // otherwise the master file is deleted! Better have a small memory leak. + if (!partitions.isEmpty()) { + partition->releaseRef(); + delete partition; + } + } else { + minSerial = std::min(minSerial, partition->getIndexAlterSerial()); + dataSize += partition->getDataSize(); + indexSize += partition->getIndexSize(); + records += partition->getRecords(); + partition->setMainPartition(mainPartition); + if (!partition->isMain()) { + partition->getMainPartition()->addChildPartition(partition); + } + + } + } + + assert(master.partitions_.isSorted()); + master.dataSize_ = dataSize; + master.indexSize_ = indexSize; + master.records_ = records; + + // Remove obsolete alterations. + for (uint32_t i = 0; i < master.indexAlterations_.length(); ++i) { + const Alteration& alteration = master.indexAlterations_[i]; + if (alteration.getSerial() <= minSerial) { + master.indexAlterations_.removeAt(i--); // Wrapping. + } + } + + // Start DNS configuration. + master.setDnsConfiguration(dnsConfiguration); + return buffer; +} + +// Serialization +ByteBuffer& operator << (ByteBuffer& buffer, const Master& master) { + buffer << master.version_; + buffer << master.columns_ << master.indexes_ << master.indexMappings_ + << master.foreignKeys_; + if (master.dnsConfiguration_ == 0) { + buffer << DnsConfiguration(); + } else { + buffer << *master.dnsConfiguration_.get(); + } + buffer << master.maxLifetime_ << master.aggregationPeriod_ << master.defaultWhere_ + << master.stringOptimization_ << master.autoInc_ << master.serial_ << master.timeCreated_ + << master.timeUpdated_ << master.dataSize_ << master.indexSize_ << master.records_ + << master.indexAlterSerial_ << master.indexAlterElapsed_ << master.indexAlterations_ + << master.coalescingPeriod_; + + // Do not write transient and temporary partitions. + const Partitions& partitions = master.partitions_; + const uint32_t length = partitions.length(); + uint32_t count = 0; + uint32_t i; + for (i = 0; i < length; ++i) { + const Partition& partition = *partitions[i]; + if (!partition.isTransient() && !partition.isTemporary()) { + count++; + } + } + buffer << count; + for (i = 0; i < length; ++i) { + const Partition& partition = *partitions[i]; + if (!partition.isTransient() && !partition.isTemporary()) { + assert(master.serial_ > partition.getSerial()); + buffer << static_cast(partition); + } + } + return buffer; +} + +void Master::retrieve(ByteBuffer& buffer) const { + SPARROW_ENTER("Master::retrieve"); + FileId key; + getMasterFileName(key.getName()); + ReadGuard guard(getLock()); + + // Send the master file size so it can be recreated identically by the caller process + // (remember written files are padded with extra bytes to have a length multiple + // of sector size). + FileCacheGuard fileGuard(FileCache::get(), 0, key, 0, true); + const FileHandle& handle = fileGuard.get()->getValue(); + buffer << handle.getSize(); + buffer << *this; +} + +bool Master::startIndexAlter(const bool check) _THROW_(SparrowException) { + SPARROW_ENTER("Master::startIndexAlter"); + WriteGuard guard(getLock()); + DBUG_PRINT("sparrow_alter", ("startIndexAlter on table %s.%s: check %d, %d, %llu", + getDatabase().c_str(), getTable().c_str(), check, indexAlterSerial_, static_cast(indexAlterStarted_))); + if (!check && (indexAlterSerial_ == 0 || indexAlterStarted_ != 0)) { + // Nothing to do or alterations already started. + return false; + } + if (partitions_.isEmpty()) { + // No partition. + return false; + } + DBUG_PRINT("sparrow_alter", ("Check if index alteration must be started for %s.%s", getDatabase().c_str(), getTable().c_str())); + uint32_t i = partitions_.length() - 1; + for (;;) { + Partition* partition = partitions_[i]; + + // Alter persistent partitions with pending alterations and which are not being coalesced. + if (!partition->isTemporary() && partition->isIndexAlterable() && !partition->isReady() && !coalescedSerials_.contains(partition->getSerial())) { + PersistentPartition* persistentPartition = static_cast(partition); + if (indexAlterStarted_ == 0) { + indexAlterStarted_ = Scheduler::now(); + AlterWorker::initialize(); +#ifndef NDEBUG + const Str stimestamp(Str::fromTimestamp(indexAlterStarted_)); + DBUG_PRINT("sparrow_alter", ("Index alteration started at %s for %s.%s", stimestamp.c_str(), getDatabase().c_str(), getTable().c_str())); +#endif + } + DBUG_PRINT("sparrow_alter", ("Send index alteration task for partition %s.%s.%llu (most recent)", getDatabase().c_str(), getTable().c_str(), + static_cast(persistentPartition->getSerial()))); + Scheduler::addTask(new MainAlterTask(persistentPartition)); + return true; + } + if (i-- == 0) { + // Scanned all partitions. + if (check) { + // This was a check after an alteration sequence: reset alter status. + indexAlterStarted_ = 0; + indexAlterElapsed_ = 0; + } + break; + } + } + DBUG_PRINT("sparrow_alter", ("No index alteration necessary for %s.%s", getDatabase().c_str(), getTable().c_str())); + return false; +} + +void Master::indexAlterationDone() { + const uint64_t now = Scheduler::now(); + if (now > indexAlterStarted_) { + indexAlterElapsed_ += now - indexAlterStarted_; + } + indexAlterStarted_ = now; +} + +bool Master::getIndexAlterStatus(uint64_t& elapsed, uint64_t& left, double& percentage) const { + SPARROW_ENTER("Master::reportAlterStatus"); + if (indexAlterStarted_ == 0) { + return false; + } + uint32_t total = 0; + uint32_t ready = 0; + for (uint32_t i = 0; i < partitions_.length(); ++i) { + const Partition* partition = partitions_[i]; + if (!partition->isTemporary() && partition->isIndexAlterable()) { + total++; + const PersistentPartition* persistentPartition = static_cast(partition); + if (persistentPartition->getIndexAlterSerial() == getIndexAlterSerial()) { + ready++; + } + } + } + if (ready == total || getIndexAlterations().isEmpty()) { + return false; + } + const double achieved = static_cast(ready) / total; + elapsed = indexAlterElapsed_; + if (achieved > 0) { + left = static_cast(elapsed / achieved * (1.0 - achieved)); + if (left > 15 * 86400000) { // Remove meaningless value (> 15 days). + left = 0; + } + } else { + left = 0; + } + percentage = achieved * 100; + return true; +} + +bool Master::getIndexAlterStatus(SYSslist& strings) const { + SPARROW_ENTER("Master::reportAlterStatus"); + uint64_t elapsed; + uint64_t left; + double percentage; + if (getIndexAlterStatus(elapsed, left, percentage)) { + char tmp[1024]; + snprintf(tmp, sizeof(tmp), "%s.%s", getDatabase().c_str(), getTable().c_str()); + strings.append(Str(tmp)); + strings.append(indexAlterations_.first().getDescription(*this)); + strings.append(Str::fromDuration(elapsed)); + if (left > 0) { + strings.append(Str::fromDuration(left)); + } else { + strings.append(Str("N/A")); + } + snprintf(tmp, sizeof(tmp), "%.1f%%", percentage); + strings.append(Str(tmp)); + for (uint32_t i = 1; i < indexAlterations_.length(); ++i) { + strings.append(Str()); + strings.append(indexAlterations_[i].getDescription(*this)); + strings.append(Str()); + strings.append(Str()); + strings.append(Str()); + } + return true; + } else { + return false; + } +} + +void Master::dropColumn(const char* name) _THROW_(SparrowException) { + SPARROW_ENTER("Master::dropColumn"); + const Str n(name); + const uint32_t pos = getColumn(n); + assert(pos != 0 && pos != SYS_NPOS); + columns_[pos].drop(getColumnAlterSerial() + 1); + DBUG_PRINT("sparrow_alter", ("Dropping column %s from table %s.%s", name, getDatabase().c_str(), getTable().c_str())); + computeMappedColumnIds(); +} + +void Master::addColumn(const char* after, Column& newColumn) _THROW_(SparrowException) { + SPARROW_ENTER("Master::addColumn"); + assert(after != first_keyword); + const Str name(after); + uint32_t pos = getColumn(name); + assert(pos != SYS_NPOS); + newColumn.setSerial(getColumnAlterSerial() + 1); + pos++; + const bool last = pos == columns_.length(); + columns_.insertAt(pos, newColumn); + DBUG_PRINT("sparrow_alter", ("Adding column %s to table %s.%s at position %u, after column %s", newColumn.getName().c_str(), + getDatabase().c_str(), getTable().c_str(), pos, after)); + if (!last) { + // Update column ids in existing indexes. + DBUG_PRINT("sparrow_alter", ("Update column ids in existing indexes")); + Indexes newIndexes(indexes_.length()); + for (uint32_t i = 0; i < indexes_.length(); ++i) { + const Index& index = indexes_[i]; + const ColumnIds& ids = index.getColumnIds(); + ColumnIds newIds(ids.length()); + for (uint32_t j = 0; j < ids.length(); ++j) { + const uint32_t id = ids[j]; + newIds.append(id >= pos ? id + 1 : id); + } + Index newIndex(index.getName().c_str(), newIds, index.isUnique()); + if (index.isDropped()) { + newIndex.drop(); + } + newIndexes.append(newIndex); + } + indexes_ = newIndexes; + } + computeMappedColumnIds(); +} + +void Master::renameColumn(const char* from, const char* to) _THROW_(SparrowException) { + SPARROW_ENTER("Master::renameColumn"); + const Str name(from); + const uint32_t pos = getColumn(name); + assert(pos != SYS_NPOS); + Column& column = columns_[pos]; + column = Column(to, column.getType(), column.getFlags(), column.getInfo(), + column.getCharset().c_str(), column.getDefaultValue()); +} + +PartitionGuard Master::getPartitionNoLock(const uint64_t serial) const { + SPARROW_ENTER("Master::getPartitionNoLock"); + const PartitionKey key(serial); + return PartitionGuard(partitions_.find(&key)); +} + +PartitionGuard Master::getPartition(const uint64_t serial) const { + SPARROW_ENTER("Master::getPartition"); + ReadGuard guard(getLock()); + return getPartitionNoLock(serial); +} + +uint64_t Master::getOldest(const bool persistentOnly /* = false */) const { + uint64_t oldest = 0; + intervals_.getMin(oldest); + if (!persistentOnly) { + for (uint32_t i = 0; i < transientPartitions_.length(); ++i) { + TransientPartition* partition = transientPartitions_[i]; + TimePeriod period{partition->getPeriod()}; + const uint64_t* plow = period.getLow(); + if (plow != 0) { + oldest = oldest == 0 ? *plow : std::min(oldest, *plow); + } + } + } + return oldest; +} + +uint64_t Master::getNewest(const bool persistentOnly /* = false */) const { + uint64_t newest = 0; + intervals_.getMax(newest); + if (!persistentOnly) { + for (uint32_t i = 0; i < transientPartitions_.length(); ++i) { + TransientPartition* partition = transientPartitions_[i]; + TimePeriod period{partition->getPeriod()}; + const uint64_t* phigh = period.getUp(); + if (phigh != 0) { + newest = newest == 0 ? *phigh : std::max(newest, *phigh); + } + } + } + return newest; +} + +PersistentPartition* Master::newPersistentPartition(const uint32_t version, const uint64_t dataSerial, const uint32_t filesystem, + const TimePeriod& period, const uint32_t records, const uint32_t indexAlterSerial, const uint32_t columnAlterSerial, + const uint64_t dataRecords, const uint64_t recordOffset, const ColumnIds& emptyColumnIds) { + SPARROW_ENTER("Master::newPersistentPartition"); + const uint64_t serial = serial_++; + PersistentPartition* mainPartition = 0; + if (dataSerial != SAME_AS_SERIAL) { + mainPartition = static_cast(getPartitionNoLock(dataSerial).get()); + } + return new PersistentPartition(version, this, serial, mainPartition, filesystem, + indexAlterSerial, columnAlterSerial, period, records, 0, 0, dataRecords, recordOffset, emptyColumnIds); +} + +PersistentPartition* Master::newTemporaryPersistentPartition(const PersistentPartition& partition, const uint32_t records, const uint64_t recordOffset) { + SPARROW_ENTER("Master::newTemporaryPersistentPartition"); + const uint64_t serial = serial_++; + PersistentPartition* mainPartition = partition.getMainPartition(); + const TimePeriod temporaryPeriod(partition.getPeriod().getMin(), 0); + PersistentPartition* temporary = new PersistentPartition(partition.getVersion(), this, serial, mainPartition, FileUtil::chooseFilesystem(true), + partition.getIndexAlterSerial(), partition.getColumnAlterSerial(), temporaryPeriod, records, 0, 0, 0, recordOffset, partition.getSkippedColumns()); + temporary->acquireRef(); + WriteGuard guard(getLock()); + assert(!partitions_.contains(temporary)); + partitions_.insert(temporary); + intervals_.insert(temporary); + return temporary; +} + +// Removes the transient partition from the list of partitions and add the new persistent partition, newPartition. +// Also add the main partition if it's just been created and update the time intervals. +void Master::mutatePartition(TransientPartition* transientPartition, PersistentPartition* mainPartition, PersistentPartition* newPartition) { + partitions_.remove(transientPartition); + transientPartitions_.remove(transientPartition); + transientPartition->releaseRef(); + if (mainPartition != 0) { + assert(newPartition != 0); + if (partitions_.contains(mainPartition)) { + // Extend period of main partition. + intervals_.remove(*mainPartition); + mainPartition->extendPeriod(newPartition->getPeriod()); + intervals_.insert(mainPartition); + } else { + assert(mainPartition->getRecords() == 0 && mainPartition->getIndexSize() == 0); + partitions_.insert(mainPartition); + intervals_.insert(mainPartition); + } + assert(newPartition->getDataSize() == 0); + newPartition->acquireRef(); + assert(!partitions_.contains(newPartition)); + partitions_.insert(newPartition); + intervals_.insert(newPartition); + mainPartition->addChildPartition(newPartition); + setDataSize(getDataSize() + transientPartition->getDataSize()); + setIndexSize(getIndexSize() + transientPartition->getIndexSize()); + setRecords(getRecords() + transientPartition->getRecords()); + const uint64_t coalescingPeriod = getCoalescingPeriod(); + if (coalescingPeriod != 0) { + uint64_t limit = transientPartition->getPeriodNoLock().getMin(); + limit -= limit % coalescingPeriod; + coalescingTimestamp_ = std::min(limit, coalescingTimestamp_); + } + } +} + +bool Master::hasBuiltInTimestampIndex() const { + if (!indexes_.isEmpty()) { + const Index& index = indexes_[0]; + if (!index.isDropped() && index.getColumnIds().length() == 1 && index.getColumnIds()[0] == 0) { + for (uint32_t i = 0; i < partitions_.length(); ++i) { + const Partition& partition = *partitions_[i]; + if (!partition.isTransient() && !partition.isTemporary() && static_cast(partition).getVersion() == 0) { + return true; + } + } + } + } + return false; +} + +// Find persistent partitions within the given period. The upper bound of the given period is smaller than the newest timestamp. +void Master::getCoalescingCandidates(const TimePeriod& period, CoalescingCandidates& candidates, const bool fake) const { + SPARROW_ENTER("Master::getCoalescingCandidates"); +#ifndef NDEBUG + const Str speriod(Str::fromTimePeriod(period)); + DBUG_PRINT("sparrow_coalescing", ("Search coalescing candidates on %s.%s for period %s", getDatabase().c_str(), + getTable().c_str(), speriod.c_str())); +#endif + candidates.clear(); + Intervals intervals; + intervals_.findOverlaps(period, intervals); + const uint64_t length = period.getLength(); + for (uint32_t i = 0; i < intervals.length(); ++i) { + PersistentPartition* partition = static_cast(intervals[i]); + if (partition->isTemporary() || partition->isTransient()) { + continue; + } + if (!fake && !partition->isReady()) { + // We are interested only in persistent partitions with no alteration on going. + continue; + } + if (!partition->getPeriod().intersects(period)) { + // Reject false positives. + continue; + } + const TimePeriod pperiod = partition->getPeriod(); + if (fake || !coalescedSerials_.contains(partition->getSerial())) { + const bool isAppend = partition->getVersion() >= PersistentPartition::appendVersion_; + PersistentPartitions partitions; + if (isAppend && partition->isMain()) { + // Get all child partitions. + const ChildPartitions& childPartitions = partition->getChildPartitions(); + for (uint32_t j = 0; j < childPartitions.length(); ++j) { + PersistentPartition* pp = static_cast(childPartitions[j]); + if (fake || !coalescedSerials_.contains(pp->getSerial())) { + partitions.append(PersistentPartitionGuard(pp)); + } + } + } else if (!isAppend && pperiod.getLength() <= length) { + partitions.append(PersistentPartitionGuard(partition)); + } + for (uint32_t j = 0; j < partitions.length(); ++j) { + const PersistentPartitionGuard& p = partitions[j]; + const CoalescingInfo info = p->getCoalescingInfo(); + CoalescablePartitions key(info); + CoalescablePartitions* coalescablePartitions = candidates.find(key); + if (coalescablePartitions == 0) { + coalescablePartitions = candidates.insertAndReturn(key); + } + PersistentPartitions& ppartitions = coalescablePartitions->getPartitions(); + ppartitions.insert(p); + } + } + } + + // Post-processing: coalesce a single partition only if it resides on the coalescing file system. + CoalescingCandidatesIterator iterator(candidates); + while (++iterator) { + const CoalescablePartitions& cp = iterator.key(); + const PersistentPartitions& partitions = cp.getPartitions(); + if (partitions.length() == 1 && partitions.first()->getFilesystem() < COALESCING_FILESYSTEM) { + candidates.remove(cp); + iterator.reset(); + } + } +} + +// Triggers partition coalescing, if there are candidate partitions. +void Master::coalesce() { + SPARROW_ENTER("Master::coalesce"); + if (!sparrow_coalescing || CoalescingControlTaskPerDB::isDisabled(database_)) { + return; + } + WriteGuard guard(getLock()); + if (hasBuiltInTimestampIndex()) { + return; + } + const uint64_t coalescingPeriod = getCoalescingPeriod(); + if (coalescingPeriod == 0) { + return; + } + const uint64_t newest = getNewest(true); + if (newest == 0) { + return; + } + const uint64_t oldest = getOldest(true); + const uint64_t limit = std::max(coalescingTimestamp_, oldest - (oldest % coalescingPeriod)); + if (limit == 0) { + return; + } +#ifndef NDEBUG + const Str soldest(Str::fromTimestamp(oldest)); + const Str snewest(Str::fromTimestamp(newest)); + const Str stimestamp = coalescingTimestamp_ == 0 ? Str("N/A") : Str::fromTimestamp(coalescingTimestamp_); + const Str slimit(Str::fromTimestamp(limit)); + DBUG_PRINT("sparrow_coalescing", ("Coalescing for %s.%s: oldest = %s, newest = %s, coalescing timestamp = %s, limit = %s, coalescedSerials_ contains %u", getDatabase().c_str(), getTable().c_str(), + soldest.c_str(), snewest.c_str(), stimestamp.c_str(), slimit.c_str(), coalescedSerials_.entries())); +#endif + uint64_t t = newest - (newest % coalescingPeriod); + uint64_t coalescingTimestamp = 0; + while (t >= limit) { + const uint64_t low = t - coalescingPeriod; + const TimePeriod period(&low, &t, true, false); + CoalescingCandidates candidates(16); + getCoalescingCandidates(period, candidates, false); + if (!candidates.isEmpty()) { +#ifndef NDEBUG + const Str speriod(Str::fromTimePeriod(period)); + DBUG_PRINT("sparrow_coalescing", ("Coalescing %u partition sets from %s.%s for period %s", candidates.entries(), + getDatabase().c_str(), getTable().c_str(), speriod.c_str())); +#endif + const Indexes& indexes = getIndexes(); + IndexIds indexIds(indexes.length()); + for (uint32_t i = 0; i < indexes.length(); ++i) { + if (!indexes[i].isDropped()) { + indexIds.append(i); + } + } + CoalescingCandidatesIterator iterator(candidates); + bool sent = false; + while (++iterator) { + CoalescablePartitions& cp = iterator.key(); + PersistentPartitions& partitions = cp.getPartitions(); + const uint32_t n = partitions.length(); + for (uint32_t i = 0; i < n; ++i) { + coalescedSerials_.insert(partitions[i]->getSerial()); + } + const CoalescingInfo& info = cp.getInfo(); + const uint32_t version = info.getFirst().getFirst(); + if (version < PersistentPartition::appendVersion_) { + // Version < PersistentPartition::appendVersion_: + // Coalesce data and index files. + DBUG_PRINT("sparrow_coalescing", ("Coalescing %u partitions from %s.%s (version=%u, column alter serial=%u)", partitions.entries(), + getDatabase().c_str(), getTable().c_str(), version, info.getFirst().getSecond())); +#ifndef NDEBUG + for (uint32_t i = 0; i < n; ++i) { + const PersistentPartitionGuard& partition = partitions[i]; + const Str speriod(Str::fromTimePeriod(partition->getPeriod())); + DBUG_PRINT("sparrow_coalescing", ("Partition %s.%s.%llu: %s (filesystem %u)", getDatabase().c_str(), getTable().c_str(), + static_cast(partition->getSerial()), speriod.c_str(), partition->getFilesystem())); + } +#endif + Scheduler::addTask(new CoalescingMainTask(this, partitions, indexIds)); + sent = true; + } else { + // Version >= PersistentPartition::appendVersion_: + // The data file is already coalesced (data are appended to it). + // Coalesce only index files. + DBUG_PRINT("sparrow_coalescing", ("Coalescing %u partitions from %s.%s (version=%u, column alter serial=%u, data serial=%llu)", + partitions.entries(), getDatabase().c_str(), getTable().c_str(), version, info.getFirst().getSecond(), static_cast(info.getSecond()))); + PersistentPartition& firstPartition = *partitions[0]; + assert(firstPartition.getVersion() >= PersistentPartition::appendVersion_); + uint64_t minTimestamp = ULLONG_MAX; + uint64_t maxTimestamp = 0; + uint32_t records = 0; + for (uint32_t i = 0; i < n; ++i) { + const PersistentPartitionGuard& partition = partitions[i]; +#ifndef NDEBUG + const Str speriod(Str::fromTimePeriod(partition->getPeriod())); + DBUG_PRINT("sparrow_coalescing", ("Partition %s.%s.%llu: %s (filesystem %u)", getDatabase().c_str(), getTable().c_str(), static_cast(partition->getSerial()), + speriod.c_str(), partition->getFilesystem())); +#endif + assert(!partition->isMain()); + records += partition->getRecords(); + const TimePeriod& period = partition->getPeriod(); + minTimestamp = std::min(minTimestamp, period.getMin()); + maxTimestamp = std::max(maxTimestamp, period.getMax()); + } + PersistentPartition* coalescedPartition = newPersistentPartition(firstPartition.getVersion(), firstPartition.getDataSerial(), + FileUtil::chooseFilesystem(false), TimePeriod(minTimestamp, maxTimestamp), records, firstPartition.getIndexAlterSerial(), + firstPartition.getColumnAlterSerial(), 0, 0, firstPartition.getSkippedColumns()); + Coalescing::triggerIndexCoalescing(this, partitions, partitions, coalescedPartition, indexIds); + sent = true; + } + } + if (sent) { + coalescingTimestamp = 0; + break; + } + } + t -= coalescingPeriod; + coalescingTimestamp = std::max(t, coalescingTimestamp); + } + if (coalescingTimestamp != 0) { + coalescingTimestamp_ = coalescingTimestamp; + } +} + +void Master::registerCoalescingTask(CoalescingIndexTask* task) { + indexCoalescingTask_.append(task); +} + +void Master::registerCoalescingTask(CoalescingMainTask* task) { + mainCoalescingTask_.append(task); +} + +void Master::unregisterCoalescingTask(CoalescingIndexTask* task) { + [[maybe_unused]] bool found = indexCoalescingTask_.remove(task); + assert(found == true); +} + +void Master::unregisterCoalescingTask(CoalescingMainTask* task) { + [[maybe_unused]] bool found = mainCoalescingTask_.remove(task); + assert(found == true); +} + +void Master::stopCoalescingTasks() { + ReadGuard guard(getLock()); + for (uint i=0; istop(); + } + for (uint i=0; istop(); + } +} + + +void Master::coalescingFailed(const PersistentPartitions& partitions) _THROW_(SparrowException) +{ + SPARROW_ENTER("Master::coalescingFailed"); + { + WriteGuard guard(getLock()); + for (uint32_t i = 0; i < partitions.length(); ++i) { + coalescedSerials_.remove( partitions[i]->getSerial() ); + } + } +} + +void Master::coalescingDone(PersistentPartition* coalescedPartition, const PersistentPartitions& partitions) _THROW_(SparrowException) { + SPARROW_ENTER("Master::coalescingDone"); + bool temporary = false; + bool alter = false; + do + { + { + WriteGuard guard(getLock()); + + // First check there are no pending reference to the partitions we are going to remove. + // The Coalescing task holds 2 references to each partition. There is another one in Master::partitions_. + // So if a partition has a reference count greater than 3 some other module is still using that partition + // and we have to wait. + bool can_remove = true; + for (uint32_t i = 0; i < partitions.length(); ++i) { + if ( partitions[i]->refs() > 3 ) { + can_remove = false; + break; + } + } + + if ( can_remove ) + { + for (uint32_t i = 0; i < partitions.length(); ++i) { + coalescedSerials_.remove( partitions[i]->getSerial() ); + } + temporary = removePartitions(partitions); + if (!temporary) { + if (coalescedPartition->getRecords() > 0) { + if (getIndexAlterSerial() > coalescedPartition->getIndexAlterSerial()) { + alter = true; + } + setDataSize(getDataSize() + coalescedPartition->getDataSize()); + setIndexSize(getIndexSize() + coalescedPartition->getIndexSize()); + setRecords(getRecords() + coalescedPartition->getRecords()); + coalescedPartition->acquireRef(); + assert(!partitions_.contains(coalescedPartition)); + partitions_.insert(coalescedPartition); + intervals_.insert(coalescedPartition); + if (!coalescedPartition->isMain()) { + coalescedPartition->getMainPartition()->addChildPartition(coalescedPartition); + } + } + } + toDisk(); + break; + } + } + my_sleep(100000); // 100ms + } while (true); + if (alter) { + DBUG_PRINT("sparrow_alter", ("Master::coalescingDone( %s.%s )", getDatabase().c_str(), getTable().c_str())); + startIndexAlter(false); + } + if (!temporary) { + // Try to coalesce older partitions, if any. + coalesce(); + } +} + +// Counts partitions smaller than the coalescing period, and returns the related percentage. +// Ignore partitions in the current (not completed) coalescing period. +// The return value is negative if there is nothing to coalesce and there is no coalesced partition. +double Master::getCoalescingPercentage() const { + SPARROW_ENTER("Master::getCoalescingPercentage"); + const uint64_t coalescingPeriod = getCoalescingPeriod(); + if (!sparrow_coalescing || coalescingPeriod == 0 || CoalescingControlTaskPerDB::isDisabled(database_)) { + return -1; + } + const uint64_t newest = getNewest(true); + if (newest == 0) { + return -1; + } + const uint64_t oldest = getOldest(true); + const uint64_t limit = oldest - (oldest % coalescingPeriod); + if ( limit == 0 ) { + return -1; + } + const uint64_t start = newest - (newest % coalescingPeriod); + uint32_t periods = 0; + uint32_t alreadyCoalescedPeriods = 0; + uint64_t t = start; + while (t >= limit) { + const uint64_t low = t - coalescingPeriod; + const TimePeriod period(&low, &t, true, false); + ++periods; + CoalescingCandidates candidates(16); + getCoalescingCandidates(period, candidates, true); + if (candidates.isEmpty()) { + ++alreadyCoalescedPeriods; + } + t -= coalescingPeriod; + } +#ifndef NDEBUG + const Str speriod(Str::fromTimePeriod(TimePeriod(limit - coalescingPeriod, start))); + DBUG_PRINT("sparrow_coalescing", ("Table %s.%s: %u coalesced periods vs %u total for %s", getDatabase().c_str(), getTable().c_str(), alreadyCoalescedPeriods, periods, speriod.c_str())); +#endif + return periods == 0 ? -1 : (100.0 * alreadyCoalescedPeriods) / periods; +} + +uint32_t Master::getTreeNodeSize(const uint32_t index) const { + uint32_t bits = 0; + uint32_t size = 0; + { + ReadGuard guard(getLock()); + const ColumnIds& columnIds = indexes_[index].getColumnIds(); + for (uint32_t i = 0; i < columnIds.length(); ++i) { + const Column& column = columns_[columnIds[i]]; + size += column.getDataSize(); + bits += column.getBits(); + } + } + size += (bits + 7) / 8; + size += 8; // TODO row size + return size; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MasterRepairTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void MasterRepairTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + Master* master = get(); + if (master != 0) { + master->repair(); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MasterId +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// To find a master file using its unique id. +volatile uint32_t MasterId::counter_; +SYShash MasterId::idHash_(16); +RWLock MasterId::idLock_(true, "MasterId::idLock_"); + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RecordWrapper +////////////////////////////////////////////////////////////////////////////////////////////////////// + +RecordWrapper::RecordWrapper(const TableFields& fields, const ColumnIds* columnIds, const bool tree) _THROW_(SparrowException) + : fields_(columnIds == 0 ? 0 : columnIds->length()) { + initialize(fields, columnIds, tree); +} + +RecordWrapper::RecordWrapper(TableFields& fields, const ColumnIds* columnIds, const bool tree, const bool removeFromFields) _THROW_(SparrowException) + : fields_(columnIds == 0 ? 0 : columnIds->length()) { + initialize(fields, columnIds, tree); + if (removeFromFields) { + if (columnIds == 0) { + // Data file: all columns. + fields.clear(); + } else { + // Index file. + for (uint32_t i = 0; i < columnIds->length(); ++i) { + fields[(*columnIds)[i]] = NULL; + } + for (int i = columnIds->length()-1; i >= 0; --i) { + if (fields[i] == NULL) { + fields.removeAt(i); + } + } + } + } +} + +void RecordWrapper::initialize(const TableFields& fields, const ColumnIds* columnIds, const bool tree) _THROW_(SparrowException) { + if (columnIds == 0) { + // Data file: all columns. + fields_ = fields; + } else { + // Index file. + for (uint32_t i = 0; i < columnIds->length(); ++i) { + fields_.append(fields[(*columnIds)[i]]); + } + } + bits_ = 0; + size_ = 0; + for (uint32_t i = 0; i < fields_.length(); ++i) { + const FieldBase* field = fields_[i]; + if (field != 0) { + size_ += field->getSize(); + bits_ += field->getBits(); + } + } + bitSize_ = (bits_ + 7) / 8; + if (bitSize_ > SPARROW_MAX_BIT_SIZE) { + throw SparrowException::create(false, "Too many bits for record wrapper"); + } + size_ += bitSize_; + if (tree) { + size_ += 8; // TODO row size + } else if (columnIds != 0) { + size_ += 4; // TODO row size + } +} + +RecordWrapper::RecordWrapper(const TableFields& fields, const ColumnIds& skippedColumnIds) _THROW_(SparrowException) { + + for (uint32_t i = 0; i < fields.length(); ++i) { + uint32_t j = 0; + for (; jgetSize(); + bits_ += field->getBits(); + } + } + bitSize_ = (bits_ + 7) / 8; + if (bitSize_ > SPARROW_MAX_BIT_SIZE) { + throw SparrowException::create(false, "Too many bits for record wrapper"); + } + size_ += bitSize_; +} + +// key_part_map indicates which columns from the index to send back. For example, if the RecordWrapper was based on index_N +// which indexed columns A, B, C, and the key_part_map equals 3 (in binary 011) that indicates we must only send back values for columns A and B. +void RecordWrapper::readUsingKeyPartMap(PartitionReader& reader, PartitionReader& stringReader, const key_part_map map, + uint8_t* buffer, const bool keyFormat) const _THROW_(SparrowException) { + uint8_t bitArray[SPARROW_MAX_BIT_SIZE]; + readBits(reader, bitArray); + uint32_t bitOffset = 0; + uint32_t f = 0; + for (uint32_t i = 0; i < fields_.length(); ++i) { + const FieldBase* field = fields_[i]; + if (field == 0) { + continue; + } + const uint32_t nbits = field->getBits(); + const bool mapped = field->isMapped(); + if (mapped && (map & (1 << f)) == 0) { + ++f; + field->skip(reader); + bitOffset += nbits; + continue; + } + if (mapped) { + ++f; + } + uint32_t bits = 0; + for (uint32_t b = 0; b < nbits; ++b, ++bitOffset) { + bits |= ((bitArray[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + } + field->readPersistent(reader, stringReader, bits, buffer, keyFormat); + if (keyFormat) { + buffer += (field->isNullable() ? 1 : 0) + field->getLength(true); + } + } +} + +void RecordWrapper::readUsingTableBitmap(TABLE& table, PartitionReader& reader, PartitionReader& stringReader, const bool all, + uint8_t* buffer, const bool keyFormat) const _THROW_(SparrowException) { + uint8_t bitArray[SPARROW_MAX_BIT_SIZE]; + readBits(reader, bitArray); + uint32_t bitOffset = 0; + uint32_t f = 0; + + // In case of update, need to read all fields. + const bool forUpdate = !bitmap_is_clear_all(table.write_set); + for (uint32_t i = 0; i < fields_.length(); ++i) { + const FieldBase* field = fields_[i]; + if (field == 0) { + continue; + } + const uint32_t nbits = field->getBits(); + const bool mapped = field->isMapped(); + if (mapped && !all && !forUpdate && !bitmap_is_set(table.read_set, f)) { + ++f; + field->skip(reader); + bitOffset += nbits; + continue; + } + if (mapped) { + ++f; + } + uint8_t bits = 0; + for (uint32_t b = 0; b < nbits; ++b, ++bitOffset) { + bits |= ((bitArray[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + } + field->readPersistent(reader, stringReader, bits, buffer, keyFormat); + if (keyFormat) { + buffer += (field->isNullable() ? 1 : 0) + field->getLength(true); + } + } +} + +void RecordWrapper::readKeyValue(PartitionReader& reader, PartitionReader& stringReader, ByteBuffer& buffer, BinBuffer* binBuffer) const _THROW_(SparrowException) { + uint8_t bitArray[SPARROW_MAX_BIT_SIZE]; + readBits(reader, bitArray); + buffer << ByteBuffer(bitArray, getBitSize()); + int bitOffset = 0; + for (uint32_t i = 0; i < fields_.length(); ++i) { + const FieldBase* field = fields_[i]; + if (field == 0) { + continue; + } + const int nbits = field->getBits(); + uint8_t bits = 0; + for (int b = 0; b < nbits; ++b, ++bitOffset) { + bits |= ((bitArray[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + } + field->copy(reader, stringReader, bits, buffer, binBuffer); + } +} + +int RecordWrapper::compare(ByteBuffer& buffer1, PartitionReader& stringReader1, ByteBuffer& buffer2, + PartitionReader& stringReader2, BinBuffer* binBuffer) const _THROW_(SparrowException) { + assert(&stringReader1 != &stringReader2); + uint8_t bitArray1[SPARROW_MAX_BIT_SIZE]; + readBits(buffer1, bitArray1); + uint8_t bitArray2[SPARROW_MAX_BIT_SIZE]; + readBits(buffer2, bitArray2); + int bitOffset = 0; + for (uint32_t i = 0; i < fields_.length(); ++i) { + const FieldBase* field = fields_[i]; + if (field == 0) { + continue; + } + const int nbits = field->getBits(); + uint8_t bits1 = 0; + uint8_t bits2 = 0; + for (int b = 0; b < nbits; ++b, ++bitOffset) { + bits1 |= ((bitArray1[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + bits2 |= ((bitArray2[bitOffset / 8] & (1 << (bitOffset % 8))) == 0 ? 0 : 1) << b; + } + const int cmp = field->compare(buffer1, bits1, stringReader1, buffer2, bits2, stringReader2, binBuffer); + if (cmp != 0) { + return cmp; + } + } + return 0; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DataFileReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +DataFileReader::DataFileReader(const TableFields& fields, const ColumnIds& columnIds, const ColumnIds& skippedColumns) + : fields_(fields), infos_(columnIds.length()), recordWrapper_(fields, skippedColumns) { + for (uint32_t i = 0; i < columnIds.length(); ++i) { + infos_.append(ColumnInfo()); + } + uint32_t bitOffset = 0; + uint32_t offset = 0; + for (uint32_t i = 0; i < fields.length(); ++i) { + const FieldBase* field = fields[i]; + if (field == 0) { + continue; + } + const uint32_t size = field->getSize(); + const uint32_t nbits = field->getBits(); + for (uint32_t j = 0; j < columnIds.length(); ++j) { + if (i == columnIds[j]) { + infos_[j] = ColumnInfo(i, bitOffset, nbits, offset, size); + break; + } + } + bitOffset += nbits; + offset += size; + } +} + +} diff --git a/storage/sparrow/engine/master.h b/storage/sparrow/engine/master.h new file mode 100644 index 000000000000..2790a43b03a6 --- /dev/null +++ b/storage/sparrow/engine/master.h @@ -0,0 +1,868 @@ +/* + Master file. +*/ + +#ifndef _engine_master_h_ +#define _engine_master_h_ + +#include "types.h" +#include "scheduler.h" +#include "context.h" +#include "list.h" +#include "hash.h" +#include "intervaltree.h" +#include "../dns/dnsconfiguration.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Master +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TransientPartition; +class CoalescingIndexTask; +class CoalescingMainTask; +typedef SYSvector Names; +typedef RefPtr TransientPartitionGuard; +typedef SYSvector TransientPartitions; +class PersistentPartition; +typedef RefPtr PersistentPartitionGuard; +typedef SYSvector PersistentPartitions; +typedef SYSvector IndexMappings; +typedef SYSsortedVector Serials; +typedef SYSpVector, 256> Intervals; +class MasterDependency; +typedef SYSidlist MasterDependencies; +typedef Pair, uint64_t> CoalescingInfo; +class CoalescablePartitions; +typedef SYShash CoalescingCandidates; +typedef SYShashIterator CoalescingCandidatesIterator; +typedef SYSvector PartitionIds; +typedef SYSvector IndexCoalescingTasks; +typedef SYSvector MainCoalescingTasks; + +#define SAME_AS_SERIAL ULLONG_MAX +class Master : public RefCounted { + friend ByteBuffer& operator >> (ByteBuffer& buffer, Master& master); + friend ByteBuffer& operator << (ByteBuffer& buffer, const Master& master); + +private: + + uint32_t id_; // Unique transient identifier, for block cache handling. + + RWLock* lock_; // R/W lock protecting this master. + + Lock* updateLock_; // Lock and conditions to make sure data update does not occur during + Cond* canUpdate_; // transient partition flush or coalescing of data files (old format). + Cond* updateOnGoing_; + volatile bool updating_; // If true, an SQL update is on going. + volatile uint32_t updateBlockers_; // Number of operations currently preventing SQL update. + + Lock* flushLock_; // Lock and condition to serialize flushs to a given main partition. + Cond* canFlush_; + Serials flushSerials_; + + Str database_; + Str table_; + + Columns columns_; + ColumnIds mappedColumnIds_; + Indexes indexes_; + IndexMappings indexMappings_; // Mapping between MySQL index id and our index id. + ForeignKeys foreignKeys_; + + DnsConfigurationGuard dnsConfiguration_; + + uint64_t maxLifetime_; + uint32_t aggregationPeriod_; + + int64_t autoInc_; + + volatile uint64_t serial_; // Current partition serial number. + + uint64_t timeCreated_; // Seconds since epoch. + uint64_t timeUpdated_; // Seconds since epoch. + + uint64_t dataSize_; + uint64_t indexSize_; + uint64_t records_; + + // Partitions. + Partitions partitions_; + IntervalTree intervals_; + SYSpVector transientPartitions_; + + // Online index modifications. + uint32_t indexAlterSerial_; + Alterations indexAlterations_; + uint64_t indexAlterStarted_; // Milliseconds since epoch. + uint64_t indexAlterElapsed_; // Elapsed time doing alterations. + + uint64_t coalescingPeriod_; + uint64_t defaultWhere_; + + uint64_t stringOptimization_; // String optimization size. + + // Serial numbers of partitions being coalesced. + Serials coalescedSerials_; + + // List of coalescing tasks (either pending or being processed) + IndexCoalescingTasks indexCoalescingTask_; + MainCoalescingTasks mainCoalescingTask_; + + // All data before this timestamp are already coalesced. + uint64_t coalescingTimestamp_; + + // Version for serialization. + const uint32_t version_; + + // Dependencies to remove upon deletion. + Lock* depLock_; + Cond* depCond_; + MasterDependencies dependencies_; + + static thread_local TABLE_SHARE* threadKey_; + +public: + + static const uint32_t currentVersion_; + +private: + + void logPartitions() const; + + void listPartitionsForMain(const PersistentPartition& mainPartition, PersistentPartitions& partitions) const _THROW_(SparrowException); + + bool removePartitions(const PersistentPartitions& partitions); + + void getCoalescingCandidates(const TimePeriod&, CoalescingCandidates& candidates, const bool fake) const; + + static bool formatFileName(char* to, const char* name, const char* dir, const char *extension); + +public: + + static void initialize(); + + Master(const char* database, const char* table, const bool key); + + void setup(const bool full); + + void prepareForDeletion(); + + ~Master(); + + void closeFiles(); + + void rename(const char* newDatabase, const char* newTable) _THROW_(SparrowException); + + bool isKey() const { + return !database_.isOwned(); + } + + uint32_t getId() const { + return id_; + } + + RWLock& getLock() const { return *lock_; } + + void startUpdate() { + Guard guard(*updateLock_); + while (updateBlockers_ > 0) { + canUpdate_->wait(true); + } + } + + void endUpdate() { + Guard guard(*updateLock_); + updating_ = false; + updateOnGoing_->signalAll(true); + } + + void blockUpdate() { + Guard guard(*updateLock_); + while (updating_) { + updateOnGoing_->wait(true); + } + updateBlockers_++; + } + + void allowUpdate() { + Guard guard(*updateLock_); + if (--updateBlockers_ == 0) { + canUpdate_->signalAll(true); + } + } + + // Check if a job is already flushing data to that main partition (identified by its serial number) + // If there is, wait until it has finished. + void startFlush(const uint64_t serial) { + Guard guard(*flushLock_); + while (flushSerials_.contains(serial)) { + canFlush_->wait(true); + } + flushSerials_.insert(serial); + } + + bool isFlushing(const uint64_t serial) { + Guard guard(*flushLock_); + return flushSerials_.contains(serial); + } + + void endFlush(const uint64_t serial) { + Guard guard(*flushLock_); + assert(flushSerials_.contains(serial)); + flushSerials_.remove(serial); + canFlush_->signalAll(true); + } + + Lock& getDepLock() const { return *depLock_; } + + static const char* getMasterFileName(char* buffer, const char* database, const char* table, + const bool appendExtension) _THROW_(SparrowException); + + const char* getMasterFileName(char* buffer) const _THROW_(SparrowException); + + const char* getDataDirectory(const uint32_t filesystem, char* buffer) const _THROW_(SparrowException); + + const char* getFileName(const uint32_t version, const uint32_t filesystem, const TimePeriod& period, + const uint32_t fileId, const uint64_t serial, const uint64_t dataSerial, char* buffer) const _THROW_(SparrowException); + + static bool checkForCorruption(struct tm& t); + + void toDisk() _THROW_(SparrowException); + + static Master* fromDisk(const char* database, const char* table, TABLE_SHARE* s) _THROW_(SparrowException); + + // Caution: MySQL changes db and table names to lower case, so use case-insensitive comparison + // (note Str::hash is already case-insensitive). + bool operator == (const Master& right) const { + return database_.compareTo(right.database_, true) == 0 + && table_.compareTo(right.table_, true) == 0; + } + + bool operator < (const Master& right) const { + if (database_.compareTo(right.database_, true) < 0) { + return true; + } else if (right.database_.compareTo(database_, true) < 0) { + return false; + } + return table_.compareTo(right.table_, true) < 0; + } + + PersistentPartitionGuard findMainPartition(uint64_t serial, TimePeriod period, uint32_t columnAlterSerial, uint32_t indexAlterSerial, const ColumnIds& emptyColumnIds); + + void getPartitionsForTimePeriod(const TimePeriod& period, ReferencedPartitions& entries, QueryInfo& queryInfo) const; + + void coalesce(); + + void registerCoalescingTask(CoalescingIndexTask* task); + void registerCoalescingTask(CoalescingMainTask* task); + void unregisterCoalescingTask(CoalescingIndexTask* task); + void unregisterCoalescingTask(CoalescingMainTask* task); + uint getNbCoalescingTasks() { + ReadGuard guard(getLock()); + return indexCoalescingTask_.entries() + mainCoalescingTask_.entries(); + } + + void stopCoalescingTasks(); + + void coalescingDone(PersistentPartition* coalescedPartition, const PersistentPartitions& partitions) _THROW_(SparrowException); + + void coalescingFailed(const PersistentPartitions& partitions) _THROW_(SparrowException); + + TransientPartitionGuard getTransientPartition(const uint64_t& timestamp); + + void removePartitions(const TimePeriod& period) _THROW_(SparrowException); + + bool forceFlush(); + + bool forceFlushNoLock(const TransientPartitions&, bool master_lock_taken=false); + + void waitForFlush(); + void waitForFlush(const PartitionIds&); + + const Str& getDatabase() const { + return database_; + } + + const Str& getTable() const { + return table_; + } + + const Columns& getColumns() const { + return columns_; + } + + Columns& getColumns() { + return columns_; + } + + bool compareColumns(const ColumnExs& columns) const { + uint32_t j = 0; + for (uint32_t i = 0; i < columns_.length(); ++i) { + const Column& column = columns_[i]; + if (column.isDropped()) { + continue; + } + if (j >= columns.length() || column != columns[j]) { + return false; + } + ++j; + } + return true; + } + + void computeMappedColumnIds() { + const uint32_t nbColumns = columns_.length(); + mappedColumnIds_ = ColumnIds(nbColumns); + uint32_t pos = 0; + for (uint32_t i = 0; i < nbColumns; ++i) { + if (columns_[i].isDropped()) { + mappedColumnIds_.append(SYS_NPOS); + } else { + mappedColumnIds_.append(pos++); + } + } + } + + void setColumns(const Columns& columns) { + columns_ = columns; + computeMappedColumnIds(); + } + + void updateColumns(const ColumnExs& columns) { + uint32_t j = 0; + for (uint32_t i = 0; i < columns_.length(); ++i) { + Column& column = columns_[i]; + if (column.isDropped()) { + continue; + } + const uint32_t save = column.getSerial(); + column = columns[j++]; + column.setSerial(save); + } + } + + uint32_t getColumn(const Str& name) const { + for (uint32_t i = 0; i < columns_.length(); ++i) { + const Column& column = columns_[i]; + if (column.isDropped()) { + continue; + } + if (column.getName().compareTo(name, true) == 0) { + return i; + } + } + return SYS_NPOS; + } + + uint32_t getColumn(int colPos) const { + int pos = -1; + for (uint32_t i = 0; i < columns_.length(); ++i) { + const Column& column = columns_[i]; + if (column.isDropped()) { + continue; + } + if (++pos == colPos) { + return i; + } + } + return SYS_NPOS; + } + + void getColumnIds(const Names& colNames, ColumnIds& colIds) const _THROW_(SparrowException); + + void shiftColumnIds(ColumnIds& ids) const { + for (uint32_t i = 0; i < ids.length(); ++i) { + ids[i] = mappedColumnIds_[ids[i]]; + } + } + + ColumnIds updateColumnIds(const ColumnIds& ids, const ColumnExs& columns) { + ColumnIds result(ids.length()); + for (uint32_t i = 0; i < ids.length(); ++i) { + const Column& column = columns[ids[i]]; + const uint32_t id = getColumn(column.getName()); + assert(id != SYS_NPOS); + result.append(id); + } + return result; + } + + const Indexes& getIndexes() const { + return indexes_; + } + + void getFields(const uint32_t serial, const bool coalescing, TableFields& fields, const ColumnIds* skippedColumnIds) const { + FieldBase::createFields(serial, coalescing, 0, columns_, fields, skippedColumnIds); + } + + const ForeignKeys& getForeignKeys() const { + return foreignKeys_; + } + + const DnsConfiguration* getDnsConfiguration() const { + return dnsConfiguration_.get(); + } + + DnsConfiguration* getDnsConfiguration() { + return dnsConfiguration_.get(); + } + + uint64_t getMaxLifetime() const { + if (maxLifetime_ == 0) { + return static_cast(sparrow_default_max_lifetime) * static_cast(86400000); + } else { + return maxLifetime_; + } + } + + void setMaxLifetime(const uint64_t maxLifetime) { + SPARROW_ENTER("Master::setMaxLifetime"); + DBUG_PRINT("sparrow_master", ("Set max lifetime of table %s.%s to %llu milliseconds", + getDatabase().c_str(), getTable().c_str(), static_cast(maxLifetime))); + maxLifetime_ = maxLifetime; + } + + uint32_t getAggregationPeriod() const { + return aggregationPeriod_; + } + + void setAggregationPeriod(const uint32_t aggregationPeriod) { + aggregationPeriod_ = aggregationPeriod; + } + + int64_t getAutoInc() const { + return autoInc_; + } + + void setAutoInc(const int64_t autoInc) { + autoInc_ = autoInc; + } + + uint64_t getOldest(const bool persistentOnly = false) const; + + uint64_t getNewest(const bool persistentOnly = false) const; + + uint64_t getAge() const { + const uint64_t low = getOldest(); + const uint64_t high = getNewest(); + return (low != 0 && high != 0 && high > low) ? high - low : 0; + } + + uint64_t getAge(const uint64_t t) const { + const uint64_t low = getOldest(); + return (low != 0 && t != 0 && t > low) ? t - low : 0; + } + + void setIndexes(const Indexes& indexes) { + indexes_ = indexes; + } + + void setForeignKeys(const ForeignKeys& foreignKeys) { + foreignKeys_ = foreignKeys; + } + + TransientPartitions setDnsConfiguration(const DnsConfiguration& configuration); + + void deinitialize(); + + const IndexMappings& getIndexMappings() const { + return indexMappings_; + } + + void setIndexMappings(const IndexMappings& indexMappings) { + indexMappings_ = indexMappings; + uint32_t i = 0; + while (i < indexMappings_.length()) { + if (indexMappings_[i] == -1) { + indexMappings_.removeAt(i); + } else { + i++; + } + } + } + + int getIndexId(const uint32_t mySqlIndexId) const { + return mySqlIndexId == MAX_KEY ? DATA_FILE : indexMappings_[mySqlIndexId]; + } + + int getMySqlIndexId(const uint32_t indexId) const { + for (uint32_t i = 0; i < indexMappings_.length(); ++i) { + if (static_cast(indexMappings_[i]) == indexId) { + return static_cast(i); + } + } + return MAX_KEY; + } + + uint64_t getDataSize() const { + return dataSize_; + } + + uint64_t getIndexSize() const { + return indexSize_; + } + + uint64_t getRecords() const { + return records_; + } + + uint64_t getTransientRecords() const; + + void setDataSize(uint64_t dataSize) { + dataSize_ = dataSize; + } + + void setIndexSize(uint64_t indexSize) { + indexSize_ = indexSize; + } + + void setRecords(uint64_t records) { + records_ = records; + } + + const Partitions& getPartitions() const { + return partitions_; + } + + PartitionGuard getPartitionNoLock(const uint64_t serial) const; + + PartitionGuard getPartition(const uint64_t serial) const; + + void getTransientPartitions(TransientPartitions&) const; + + uint64_t getTimeCreated() const { + return timeCreated_; + } + + uint64_t getTimeUpdated() const { + return timeUpdated_; + } + + void retrieve(ByteBuffer& buffer) const; + + uint32_t hash() const { + uint32_t result = 1; + result = 31 + database_.hash(); + result = 31 * result + table_.hash(); + return result; + } + + uint64_t getNormalizedSize() const; + + bool needToPurge(const uint64_t limit, const uint64_t total, const uint64_t totalNormalized, bool& force, const bool mode) const; + + bool purge(PersistentPartitions& partitions, const bool force, const bool mode); + + uint64_t listPartitionsForFilesystem(const uint32_t filesystem, PersistentPartitions& partitions, + const uint64_t limit, const uint64_t totalNormalized) const; + + bool purgePartitionsForFilesystem(const PersistentPartitions& partitions); + + uint32_t getIndexAlterSerial() const { + return indexAlterSerial_; + } + + void setIndexAlterSerial(const uint32_t indexAlterSerial) { + indexAlterSerial_ = indexAlterSerial; + } + + const Alterations& getIndexAlterations() const { + return indexAlterations_; + } + + void setIndexAlterations(const Alterations& indexAlterations) { + indexAlterations_ = indexAlterations; + } + + bool startIndexAlter(const bool check) _THROW_(SparrowException); + + void indexAlterationDone(); + + bool getIndexAlterStatus(uint64_t& elapsed, uint64_t& left, double& percentage) const; + bool getIndexAlterStatus(SYSslist& strings) const; + + void dropColumn(const char* name) _THROW_(SparrowException); + void addColumn(const char* after, Column& newColumn) _THROW_(SparrowException); + void renameColumn(const char* from, const char* to) _THROW_(SparrowException); + + uint32_t getColumnAlterSerial() const { + uint32_t serial = 0; + for (uint32_t i = 0; i < columns_.length(); ++i) { + const Column& column = columns_[i]; + const uint32_t columnSerial = column.isDropped() ? column.getDropSerial() : column.getSerial(); + serial = std::max(serial, columnSerial); + } + return serial; + } + + PersistentPartition* newPersistentPartition(const uint32_t version, const uint64_t dataSerial, const uint32_t filesystem, + const TimePeriod& period, const uint32_t records, const uint32_t indexAlterSerial, const uint32_t columnAlterSerial, + const uint64_t dataRecords, const uint64_t recordOffset, const ColumnIds& emptyColumnIds); + + PersistentPartition* newTemporaryPersistentPartition(const PersistentPartition& partition, const uint32_t records, const uint64_t recordOffset); + + void mutatePartition(TransientPartition* transientPartition, PersistentPartition* mainPartition, PersistentPartition* newPartition); + + bool hasBuiltInTimestampIndex() const; + + uint32_t getTreeNodeSize(const uint32_t index) const; + + uint64_t getCoalescingPeriod() const { + return coalescingPeriod_; + } + + void setCoalescingPeriod(const uint64_t coalescingPeriod) { + coalescingPeriod_ = coalescingPeriod; + coalescingTimestamp_ = 0; + } + + void resetCoalescingTimestamp() { + coalescingTimestamp_ = 0; + } + + uint64_t getDefaultWhere() const { + return defaultWhere_; + } + + void setDefaultWhere(const uint64_t defaultWhere) { + defaultWhere_ = defaultWhere; + } + + uint64_t getStringOptimization() const { + return stringOptimization_ == 0 ? sparrow_default_string_optimization_size : stringOptimization_; + } + + void setStringOptimization(const uint64_t stringOptimization) { + stringOptimization_ = stringOptimization; + } + + double getCoalescingPercentage() const; + + void addDependency(MasterDependency* dependency) { + Guard guard(getDepLock()); + dependencies_.append(dependency); + } + + void removeDependency(MasterDependency* dependency) { + Guard guard(getDepLock()); + dependencies_.remove(dependency); + if (dependencies_.isEmpty()) { + depCond_->signalAll(true); + } + } + + void repair(); +}; + +ByteBuffer& operator >> (ByteBuffer& buffer, Master& master); +ByteBuffer& operator << (ByteBuffer& buffer, const Master& master); + +typedef RefPtr MasterGuard; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// UpdateGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class UpdateGuard { +private: + + Master& master_; + +public: + + UpdateGuard(Master& master) : master_(master) { + master_.blockUpdate(); + } + + ~UpdateGuard() { + master_.allowUpdate(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MasterDependency +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MasterDependency : public MasterGuard, public SYSidlink { +public: + + MasterDependency() { + } + + MasterDependency(Master* master) : MasterGuard(master) { + if (master != 0) { + master->addDependency(this); + } + } + + virtual ~MasterDependency() { + Master* master = get(); + if (master != 0) { + master->removeDependency(this); + } + } + + virtual void stop() = 0; + + MasterDependency& operator = (const MasterDependency& right) { + if (this != &right) { + Master* master = get(); + Master* rightMaster = right.get(); + if (master != rightMaster) { + if (master != 0) { + master->removeDependency(this); + } + if (rightMaster != 0) { + rightMaster->addDependency(this); + } + *static_cast(this) = rightMaster; + } + } + return *this; + } + + MasterDependency(const MasterDependency& right) : MasterGuard() { + *this = right; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MasterKeepAlive +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MasterKeepAlive : public MasterDependency { +private: + + volatile bool stopping_; + +public: + + MasterKeepAlive() : stopping_(false) { + } + + MasterKeepAlive(Master* master) : MasterDependency(master), stopping_(false) { + } + + ~MasterKeepAlive() { + } + + void stop() override { + stopping_ = true; + } + + volatile bool& isStopping() { + return stopping_; + } + + MasterKeepAlive& operator = (const MasterKeepAlive& right) { + if (this != &right) { + *static_cast(this) = right; + stopping_ = right.stopping_; + } + return *this; + } + + MasterKeepAlive(const MasterKeepAlive& right) : MasterDependency() { + *this = right; + } +}; + +typedef SYSvector Masters; +typedef SYSsortedVector SortedMasters; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MasterTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MasterTask : public Task, protected MasterDependency { +public: + + MasterTask(Queue& queue, Master* master) : Task(queue), MasterDependency(master) { + } + + ~MasterTask() { + } + + void stop() override { + Task::stop(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MasterRepairTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MasterRepairTask : public MasterTask { +public: + MasterRepairTask(Master* master) : MasterTask(Worker::getQueue(), master) {;} + + bool operator == (const MasterRepairTask& right) const { + return this->get() == right.get(); + } + + bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { return 0; } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MasterId +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class MasterId { +private: + + static volatile uint32_t counter_; + static SYShash idHash_; + static RWLock idLock_; + + uint32_t id_; + Master* master_; + +public: + + MasterId(const uint32_t id) : id_(id), master_(0) { + } + + MasterId(Master* master) : id_(master->getId()), master_(master) { + } + + bool operator == (const MasterId& right) const { + return id_ == right.id_; + } + + uint32_t hash() const { + return 31 + static_cast(id_); + } + + static uint32_t newId() { + return Atomic::inc32(&MasterId::counter_); + } + + static void remove(Master* master) { + WriteGuard guard(idLock_); + idHash_.remove(MasterId(master)); + } + + static void insert(Master* master) { + WriteGuard guard(idLock_); + idHash_.insert(MasterId(master)); + } + + static MasterGuard get(const uint32_t id) { + ReadGuard guard(idLock_); + MasterId* masterId = idHash_.find(MasterId(id)); + return MasterGuard(masterId == 0 ? 0 : masterId->master_); + } +}; + + +} + +#endif /* #ifndef _engine_master_h_ */ diff --git a/storage/sparrow/engine/misc.h b/storage/sparrow/engine/misc.h new file mode 100644 index 000000000000..5b7f27edd8b1 --- /dev/null +++ b/storage/sparrow/engine/misc.h @@ -0,0 +1,625 @@ +/* + Miscellaneous types. +*/ + +#ifndef _engine_misc_h_ +#define _engine_misc_h_ + +//#include +#include "sql/query_options.h" // For mysqld options. +#include "atomic.h" +#include "serial.h" +#include +#include "my_dbug.h" +#include "m_ctype.h" +#include "m_string.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CodeStateGuard and debug macros compatible with exceptions +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef NDEBUG +#define SPARROW_ENTER(a) +#define DBUG_LOCK +#else +class CodeStateGuard { +private: + + const char* file_; + const uint line_; + struct _db_stack_frame_ stackFrame_; + bool valid_; + +public: + + static Lock lock_; + +public: + + CodeStateGuard(const char* func, const char* file, const uint line) : file_(file), line_(line), valid_(true) { + _db_enter_(func, ::strlen(func), file, line, &stackFrame_); + } + + void reset() { + if (valid_) { + valid_ = false; + _db_return_(line_, &stackFrame_); + } + } + + ~CodeStateGuard() { + reset(); + } +}; +#define SPARROW_ENTER(a) \ + CodeStateGuard _csGuard(a, __FILE__, __LINE__); \ + do { \ + } while(0) + +// Macro to avoid overlapping log output. +#define DBUG_LOCK \ + Guard _debugGuard(CodeStateGuard::lock_); \ + do { \ + } while(0) +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// AutoPtr +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This class holds a pointer and automatically releases it when it goes +// out-of-scope. Similar to std::auto_ptr, without the evil owner bit. +template +class AutoPtr { +private: + + T* ptr_; // The pointer we encapsulate. + + AutoPtr(const AutoPtr&); + AutoPtr& operator=(const AutoPtr&); + + void reset() { + if (ptr_ != 0) { + if (!ARRAY) { + delete ptr_; + } + else { + delete [] ptr_; + } + ptr_ = 0; + } + } + +public: + // Construction / destruction + AutoPtr() : ptr_ (0) { + } + explicit AutoPtr(T* ptr) : ptr_ (ptr) { + assert(ptr_ != 0); + } + ~AutoPtr() { + reset(); + } + + // Pointer-like operators + // + // NOTE: While it may be very tempting to write an automatic conversion operator + // to T* here, it is usually considered an evil thing and may cause very subtle + // glitches that can make your life truly miserable, so it's better to use get(). + AutoPtr& operator=(T* ptr) { + reset(); + ptr_ = ptr; + return *this; + } + + bool operator==(const T* ptr) const { + return ptr_ == ptr; + } + bool operator!=(const T* ptr) const { + return ptr_ != ptr; + } + T& operator*() { + assert(ptr_ != 0); + return *ptr_; + } + T* operator->() { + assert(ptr_ != 0); + return ptr_; + } + T& operator[](const uint32_t index) { + assert(ptr_ != 0); + return ptr_[index]; + } + T* get() { + return ptr_; + } + + // + // Releases the pointer we're holding, meaning the the caller becomes + // responsible of it's deletion. The released pointer is returned. + // + T* release() { + assert(ptr_ != 0); + T* ptr = ptr_; + ptr_ = 0; + return ptr; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RefCounter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Atomic reference counter. +class RefCounter { +public: + + RefCounter(const uint32_t n = 0): refs_ (n) { ; } + + void acquire() { Atomic::inc32(&refs_); } + bool release() { return Atomic::dec32(&refs_) == 0; } + + uint32_t refs() const { return refs_; } + + // prefix + uint32_t operator ++ () { return Atomic::inc32(&refs_); } + uint32_t operator -- () { return Atomic::dec32(&refs_); } + + // postfix + uint32_t operator ++ (int) { return Atomic::inc32(&refs_) - 1; } + uint32_t operator -- (int) { return Atomic::dec32(&refs_) + 1; } + + // add/sub + RefCounter& operator += (const int v) { Atomic::add32(&refs_, v); return *this; } + RefCounter& operator -= (const int v) { Atomic::add32(&refs_, -v); return *this; } + + // reset/set + void reset(const uint32_t n = 0) { refs_ = n; } + RefCounter& operator = (const uint32_t n) { refs_ = n; return *this; } + +private: + + volatile uint32_t refs_; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RefCounted +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Base class that provides a reference counting mechanism. +class RefCounted { +private: + + mutable RefCounter count_; // The current reference count. + +public: + RefCounted() : count_ (0) { + } + + // Copy constructor. Sets the count to 0. + RefCounted(const RefCounted&) : count_ (0) { + } + + // Destructor. Ensures nobody still holds a reference! + virtual ~RefCounted() { + assert(count_.refs () == 0); + } + + void acquireRef() { + ++count_; + } + + bool releaseRef() { + return --count_ == 0; + } + + void resetRef(uint32_t n) { + count_.reset(n); + } + + uint32_t refs() const { + return count_.refs(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RefPtr +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Smart pointer class that automatically takes and releases a reference on +// the object it holds. Usable with any kind of reference counted object. +// +// In order to be usable with this class, T must inherit from RefCounted. +template class RefPtr { +private: + + T* ptr_; // The pointer we hold. + + // Takes a reference if we're holding a pointer. + void connect() { + if (ptr_ != 0) { + ptr_->acquireRef(); + } + } + + // Releases the reference on the object we may hold. + void disconnect() { + if (ptr_ != 0 && ptr_->releaseRef()) { + delete ptr_; + } + } + +public: + + RefPtr() : ptr_ (0) { + } + + explicit RefPtr(T* ptr) : ptr_ (ptr) { + connect (); + } + + // Copy constructor. + RefPtr(const RefPtr& ptr) : ptr_ (ptr.ptr_) { + connect (); + } + + // Copy constructor from other refptr types + template RefPtr(const RefPtr& ptr) : ptr_ (ptr.get ()) { + connect (); + } + + // Destructor. + ~RefPtr() { + disconnect (); + } + + // Returns the pointer we encapsulate. + T* get() const { + return ptr_; + } + T& operator*() const { + assert(ptr_ != 0); + return *ptr_; + } + T* operator->() const { + assert(ptr_ != 0); + return ptr_; + } + operator T*() const { + return ptr_; + } + + // Automatic conversion to other refptr types + template operator RefPtr() const { + return RefPtr(ptr_); + } + + // Assignment operator. + RefPtr& operator = (T* ptr) { + if (ptr_ == ptr) { + return *this; + } + disconnect (); + ptr_ = ptr; + connect (); + return *this; + } + + // Assignment operator. + RefPtr& operator = (const RefPtr& ptr) { + if (this == &ptr || ptr_ == ptr.ptr_) { + return *this; + } + disconnect (); + ptr_ = ptr.ptr_; + connect (); + return *this; + } + + // Comparison operators. + bool operator < (const RefPtr& ptr) const { + if (ptr_ == 0) { + return ptr.ptr_ != 0; + } + if (ptr.ptr_ == 0) { + return false; + } + if (ptr_ == ptr.ptr_) { + return false; + } + return *ptr_ < *ptr.ptr_; + } + bool operator == (const RefPtr& ptr) const { + if (ptr_ == 0) { + return ptr.ptr_ == 0; + } + if (ptr.ptr_ == 0) { + return false; + } + if (ptr_ == ptr.ptr_) { + return true; + } + return *ptr_ == *ptr.ptr_; + } + + void reset() { + ptr_ = 0; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Pair +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class Pair { +private: + + T t_; + V v_; + +public: + + Pair() { + } + + Pair(const T& t, const V& v) : t_(t), v_(v) { + } + + Pair& operator = (const Pair& right) { + if (this != &right) { + t_ = right.t_; + v_ = right.v_; + } + return *this; + } + + Pair(const Pair& right) { + *this = right; + } + + bool operator == (const Pair& right) const { + if (this != &right) { + return t_ == right.t_ && v_ == right.v_; + } else { + return true; + } + } + + bool operator < (const Pair& right) const { + if (t_ < right.t_) { + return true; + } else if (right.t_ < t_) { + return false; + } else { + return v_ < right.v_; + } + } + + const T& getFirst() const { + return t_; + } + + const V& getSecond() const { + return v_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Str +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Simple string class, not optimized for performance. +typedef Interval TimePeriod; +class Str { + friend ByteBuffer& operator >> (ByteBuffer& buffer, Str& s); + friend ByteBuffer& operator << (ByteBuffer& buffer, const Str& s); + +private: + + static const char* empty_; + const char* s_; + uint32_t owned_:1; + uint32_t length_:31; + + void clear() { + if (owned_) { + my_free(const_cast(s_)); + } + s_ = 0; + } + +public: + + Str() { + s_ = empty_; + owned_ = false; + length_ = 0; + } + + explicit Str(const char* s, bool owned = true) { + assert(s != 0); + if (owned) { + s_ = static_cast(my_strdup(PSI_INSTRUMENT_ME, s, MYF(MY_FAE))); + owned_ = true; + } else { + s_ = s; + owned_ = false; + } + length_ = static_cast(strlen(s_)); + } + + explicit Str(const char* s, int length) { + if (length == 0) { + s_ = empty_; + owned_ = false; + length_ = 0; + } else { + assert(s != 0); + s_ = static_cast(my_strndup(PSI_INSTRUMENT_ME, s, length, MYF(MY_FAE))); + owned_ = true; + length_ = static_cast(strlen(s_)); + } + } + + Str(const Str& s) { + if (s.s_ == empty_) { + s_ = empty_; + owned_ = false; + } else { + s_ = static_cast(my_strdup(PSI_INSTRUMENT_ME, s.s_, MYF(MY_FAE))); + owned_ = true; + } + length_ = static_cast(strlen(s_)); + } + + Str& operator = (const Str& s) { + if (this == &s) { + return *this; + } + clear(); + if (s.s_ == empty_) { + s_ = empty_; + owned_ = false; + } else { + s_ = static_cast(my_strdup(PSI_INSTRUMENT_ME, s.s_, MYF(MY_FAE))); + owned_ = true; + } + length_ = static_cast(strlen(s_)); + return *this; + } + + ~Str() { + clear(); + } + + int length() const { + return static_cast(length_); + } + + bool isEmpty() const { + return length() == 0; + } + + const char* c_str() const { + return s_; + } + + bool isOwned() const { + return owned_; + } + + int compareTo(const Str& s, const bool caseInsensitive) const { + if (caseInsensitive) { + return my_strcasecmp(system_charset_info, s_, s.s_); + } else { + return strcmp(s_, s.s_); + } + } + + bool startsWith(const Str& s, const bool caseInsensitive) const { + if (caseInsensitive) { + return native_strncasecmp(s_, s.s_, s.length()) == 0; + } else { + return strncmp(s_, s.s_, s.length()) == 0; + } + } + + bool operator == (const Str& s) const { + return length() == s.length() && compareTo(s, false) == 0; + } + + bool operator != (const Str& s) const { + return !(*this == s); + } + + bool operator < (const Str& s) const { + return compareTo(s, false) < 0; + } + + void toLower() { + if (owned_) { + for (int i = 0; i < length(); ++i) { + const_cast(s_)[i] = tolower(s_[i]); + } + } else { + Str s(*this); + s.toLower(); + *this = s; + } + } + + Str& operator += (const Str& s) { + if (s.length() == 0) { + return *this; + } else if (length() == 0) { + *this = s; + return *this; + } + int l = length(); + int sl = s.length(); + char* ns = static_cast(my_malloc(PSI_INSTRUMENT_ME, l + sl + 1, MYF(MY_WME))); + memcpy(ns, s_, l); + memcpy(ns + l, s.s_, sl + 1); + clear(); + s_ = ns; + owned_ = true; + length_ = static_cast(strlen(s_)); + return *this; + } + + uint32_t hash() const { + uint32_t h = 1; + int off = 0; + for ( ; ; ) { + // Hash is case insensitive. + uint8_t v = static_cast(tolower(s_[off++])); + if (v == 0) { + break; + } + h = 31 * h + v; + } + return h; + } + + // Timestamp is in milliseconds. + static Str fromTimestamp(const uint64_t timestamp); + + static Str fromTimePeriod(const TimePeriod& period); + + // Duration is in milliseconds. + static Str fromDuration(const uint64_t duration); + + // Size is in bytes. + static Str fromSize(const uint64_t size); +}; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, Str& s) { + int length; + buffer >> length; + s.clear(); + s.s_ = static_cast(my_malloc(PSI_INSTRUMENT_ME, length + 1, MYF(MY_WME))); + s.owned_ = true; + s.length_ = length; + ByteBuffer contents(reinterpret_cast(s.s_), length); + buffer >> contents; + const_cast(s.s_)[length] = 0; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const Str& s) { + int length = s.length(); + buffer << length << ByteBuffer(reinterpret_cast(s.c_str()), length); + return buffer; +} + +inline static Str operator + (const Str& left, const Str& right) { + Str s = left; + s += right; + return s; +} + +} + +#endif /* #ifndef _engine_misc_h_ */ diff --git a/storage/sparrow/engine/partition.h b/storage/sparrow/engine/partition.h new file mode 100644 index 000000000000..ff6eb47c9303 --- /dev/null +++ b/storage/sparrow/engine/partition.h @@ -0,0 +1,447 @@ +/* + Generic partition. +*/ + +#ifndef _engine_partition_h_ +#define _engine_partition_h_ + +#include "types.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Position +////////////////////////////////////////////////////////////////////////////////////////////////////// + +using RowNumber = uint32_t; +#define INVALID_PARTITION UINT_MAX +#define INVALID_ROW UINT_MAX +#define INVALID_TREE_NODE UINT_MAX + +// Position in a partition file (data or index). +class Position { +private: + + uint32_t partition_; // Partition number. + RowNumber row_; // Row number in data file. + RowNumber indexHint_; // Index hint. + RowNumber startHint_; // Start index hint. + RowNumber endHint_; // End index hint. + uint32_t treeHint_; // Tree hint. + +public: + + Position(const uint32_t partition = INVALID_PARTITION, const RowNumber row = INVALID_ROW, const RowNumber indexHint = INVALID_ROW, + const RowNumber startHint = INVALID_ROW, const RowNumber endHint = INVALID_ROW, const uint32_t treeHint = INVALID_TREE_NODE) + : partition_(partition), row_(row), indexHint_(indexHint), startHint_(startHint), endHint_(endHint), treeHint_(treeHint) { + } + + void clear() { + *this = Position(partition_); + } + + bool isValid() const { + return partition_ != INVALID_PARTITION && row_ != INVALID_ROW; + } + + bool hasIndexHint() const { + return indexHint_ != INVALID_ROW; + } + + bool hasIntervalHint() const { + return startHint_ != INVALID_ROW && endHint_ != INVALID_ROW; + } + + bool hasTreeHint() const { + return treeHint_ != INVALID_TREE_NODE; + } + + uint32_t getPartition() const { + return partition_; + } + + RowNumber getRow() const { + return row_; + } + + RowNumber getIndexHint() const { + return indexHint_; + } + + RowNumber getStartHint() const { + return startHint_; + } + + RowNumber getEndHint() const { + return endHint_; + } + + uint32_t getTreeHint() const { + return treeHint_; + } + + void setPartition(const uint32_t partition) { + partition_ = partition; + } + + void setRow(const RowNumber row) { + row_ = row; + } + + Str toString() const { + char buffer[256]; + buffer[0] = 0; + char* s = buffer + sprintf(buffer, "partition "); + if (partition_ == INVALID_PARTITION) { + s += sprintf(s, "N/A"); + } else { + s += sprintf(s, "%u", partition_); + } + s += sprintf(s, ", row "); + if (row_ == INVALID_ROW) { + s += sprintf(s, "N/A"); + } else { + s += sprintf(s, "%llu", static_cast(row_)); + } + s += sprintf(s, " (hints: index="); + if (indexHint_ == INVALID_ROW) { + s += sprintf(s, "N/A"); + } else { + s += sprintf(s, "%llu", static_cast(indexHint_)); + } + s += sprintf(s, ", start="); + if (startHint_ == INVALID_ROW) { + s += sprintf(s, "N/A"); + } else { + s += sprintf(s, "%llu", static_cast(startHint_)); + } + s += sprintf(s, ", end="); + if (endHint_ == INVALID_ROW) { + s += sprintf(s, "N/A"); + } else { + s += sprintf(s, "%llu", static_cast(endHint_)); + } + s += sprintf(s, ", tree="); + if (treeHint_ == INVALID_TREE_NODE) { + s += sprintf(s, "N/A"); + } else { + s += sprintf(s, "%u", treeHint_); + } + s += sprintf(s, ")"); + return Str(buffer); + } +}; + +typedef SYSvector Positions; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// KeyValue +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class KeyValue { +private: + + uint8_t* key_; + key_part_map map_; + +public: + + KeyValue(uint8_t* key = 0, const key_part_map map = 0) : key_(key), map_(map) { + } + explicit KeyValue(const key_range* range) : key_(const_cast(range->key)), map_(range->keypart_map) { + } + const uint8_t* getKey() const { + return key_; + } + uint8_t* getKey() { + return key_; + } + key_part_map getMap() const { + return map_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SearchFlag +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SearchFlag { +public: + + enum Type { + EQ, + GE, + LE, + LE_LAST, + GT, + LT + }; + +private: + + const enum Type type_; + +public: + + static Type getType(const enum ha_rkey_function findFlag) { + switch(findFlag) { + case HA_READ_KEY_EXACT: return EQ; + case HA_READ_KEY_OR_NEXT: return GE; + case HA_READ_KEY_OR_PREV: return LE; + case HA_READ_AFTER_KEY: return GT; + case HA_READ_BEFORE_KEY: return LT; + case HA_READ_PREFIX: return GE; + case HA_READ_PREFIX_LAST: return LE_LAST; + case HA_READ_PREFIX_LAST_OR_PREV: return LE_LAST; + default: assert(0); return EQ; + } + } + explicit SearchFlag(const enum ha_rkey_function findFlag) : type_(getType(findFlag)) { + } + SearchFlag(const Type type) : type_(type) { + } + bool operator == (const Type type) const { + return type_ == type; + } + operator Type() const { + return type_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Partition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Context; +class QueryInfo; +class Partition : public RefCounted { +protected: + + uint64_t serial_; + uint64_t dataSerial_; + uint32_t filesystem_; + uint32_t indexAlterSerial_; // Index alteration serial number; to be compared to Master::indexAlterSerial_. + uint32_t columnAlterSerial_; // Column alteration serial number; to be compared to Column::serial_. + +public: + + Partition(const uint64_t serial, const uint64_t dataSerial, const uint32_t filesystem, const uint32_t indexAlterSerial, + const uint32_t columnAlterSerial) + : serial_(serial), dataSerial_(dataSerial), filesystem_(filesystem), indexAlterSerial_(indexAlterSerial), + columnAlterSerial_(columnAlterSerial) { + } + + virtual ~Partition() { + } + + virtual void detach() = 0; + + // Attributes. + + uint64_t getSerial() const { + return serial_; + } + + uint64_t getDataSerial() const { + return dataSerial_; + } + + bool isMain() const { + return getSerial() == getDataSerial(); + } + + uint32_t getFilesystem() const { + return filesystem_; + } + + void setFilesystem(const uint32_t filesystem) { + filesystem_ = filesystem; + } + + uint32_t getIndexAlterSerial() const { + return indexAlterSerial_; + } + + void setIndexAlterSerial(const uint32_t indexAlterSerial) { + indexAlterSerial_ = indexAlterSerial; + } + + uint32_t getColumnAlterSerial() const { + return columnAlterSerial_; + } + + void setColumnAlterSerial(const uint32_t columnAlterSerial) { + columnAlterSerial_ = columnAlterSerial; + } + + virtual TimePeriod getPeriod() const = 0; + + virtual uint32_t getRecords() const = 0; + + virtual uint64_t getDataSize() const = 0; + + virtual uint64_t getIndexSize() const = 0; + + virtual bool isTransient() const = 0; + + virtual bool isReady() const = 0; + + virtual bool isIndexAlterable() const = 0; + + bool isTemporary() const { + return getPeriod().getMax() == 0; + } + + // Data access. + + virtual Position indexFind(Context& context, const uint32_t partition, const KeyValue& key, const SearchFlag searchFlag) const = 0; + + virtual Position indexFirst(Context& context, const uint32_t partition) const = 0; + + virtual Position indexLast(Context& context, const uint32_t partition) const = 0; + + virtual Position indexNext(Context& context, const Position& position) const = 0; + + virtual Position indexPrevious(Context& context, const Position& position) const = 0; + + virtual Position moveNext(Context& context, const Position& position) const = 0; + + virtual Position movePrevious(Context& context, const Position& position) const = 0; + + virtual Position moveAbsolute(Context& context, const Position& position) const = 0; + + virtual Position moveFirst(Context& context, const uint32_t partition) const = 0; + + virtual Position moveLast(Context& context, const uint32_t partition) const = 0; + + virtual uint32_t recordsInRange(Context& context, const uint32_t partition, const key_range* minKey, const key_range* maxKey) const = 0; + + virtual bool readKey(Context& context, const Position& position, const bool forward, + const key_part_map keyPartMap, uint8_t* buffer, const bool keyFormat) const = 0; + + virtual bool readData(Context& context, const Position& position, uint8_t* buffer, const BlockCacheHint& hint) const = 0; + + virtual bool updateData(Context& context, const Position& position, const uint8_t* buffer) = 0; + + // Comparison. + bool operator == (const Partition& right) const { + return serial_ == right.serial_; + } + + bool operator < (const Partition& right) const { + return serial_ < right.serial_; + } + + // Hash. + uint32_t hash() const { + return 31 + static_cast(serial_ ^ (serial_ >> 32)); + } +}; + +typedef SYSpSortedVector Partitions; +typedef SYSpSortedVector ChildPartitions; +typedef RefPtr PartitionGuard; +typedef SYSsortedVector ReferencedPartitions; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionKey +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PartitionKey : public Partition { +public: + + PartitionKey(const uint64_t serial) : Partition(serial, 0, 0, 0, 0) { + } + + void detach() override { + } + + TimePeriod getPeriod() const override { + return TimePeriod(); + } + + uint32_t getRecords() const override { + return 0; + } + + uint64_t getDataSize() const override { + return 0; + } + + uint64_t getIndexSize() const override { + return 0; + } + + bool isTransient() const override { + return true; + } + + bool isReady() const override { + return true; + } + + bool isIndexAlterable() const override { + return false; + } + + Position indexFind(Context& context, const uint32_t partition, const KeyValue& key, const SearchFlag searchFlag) const override { + return Position(partition); + } + + Position indexFirst(Context& context, const uint32_t partition) const override { + return Position(partition); + } + + Position indexLast(Context& context, const uint32_t partition) const override { + return Position(partition); + } + + Position indexNext(Context& context, const Position& position) const override { + return Position(position.getPartition()); + } + + Position indexPrevious(Context& context, const Position& position) const override { + return Position(position.getPartition()); + } + + Position moveNext(Context& context, const Position& position) const override { + return Position(position.getPartition()); + } + + Position movePrevious(Context& context, const Position& position) const override { + return Position(position.getPartition()); + } + + Position moveAbsolute(Context& context, const Position& position) const override { + return Position(position.getPartition()); + } + + Position moveFirst(Context& context, const uint32_t partition) const override { + return Position(partition); + } + + Position moveLast(Context& context, const uint32_t partition) const override { + return Position(partition); + } + + uint32_t recordsInRange(Context& context, const uint32_t partition, const key_range* minKey, const key_range* maxKey) const override { + return 0; + } + + bool readKey(Context& context, const Position& position, const bool forward, + const key_part_map keyPartMap, uint8_t* buffer, const bool keyFormat) const override { + return false; + } + + bool readData(Context& context, const Position& position, uint8_t* buffer, const BlockCacheHint& hint) const override { + return false; + } + + bool updateData(Context& context, const Position& position, const uint8_t* buffer) override { + return false; + } +}; + +} + +#endif /* #ifndef _engine_partition_h_ */ diff --git a/storage/sparrow/engine/persistent.cc b/storage/sparrow/engine/persistent.cc new file mode 100644 index 000000000000..fbd1d20bfa27 --- /dev/null +++ b/storage/sparrow/engine/persistent.cc @@ -0,0 +1,1125 @@ +/* + Persistent partition. +*/ + +#include "persistent.h" +#include "transient.h" +#include "context.h" +#include "fileutil.h" +#include "alter.h" +#include "coalescing.h" +#include "../handler/hasparrow.h" + +#include "../engine/log.h" + + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PersistentPartition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Partition version history: +// 0 Initial version, with built-in index_0 for timestamp. +// 1 No more timestamp index. +// 2 Append to data files, coalesce index files, store strings in separate file. +const uint32_t PersistentPartition::currentVersion_ = 2; + +const uint32_t PersistentPartition::appendVersion_ = 2; + +PersistentPartition::~PersistentPartition() { + SPARROW_ENTER("PersistentPartition::~PersistentPartition"); + const Master* master = master_.get(); + if (master != 0) { + DBUG_PRINT("sparrow_purge", ("Destroying persistent partition %s.%s.%llu", + master->getDatabase().c_str(), master->getTable().c_str(), static_cast(getSerial()))); + uint32_t nbIndexes; + { + ReadGuard guard(master->getLock()); + nbIndexes = master->getIndexes().length(); + } + + try { + // Remove partition files. + // Note the files may have been deleted before, so silently ignore errors. + char name[FN_REFLEN]; + bool deleteIndexFiles = true; + if (getVersion() >= PersistentPartition::appendVersion_) { + if (isMain()) { + FileCache::releaseFile(FileId(getFileName(DATA_FILE, name), FILE_TYPE_DATA, FILE_MODE_READ), true); + FileCache::releaseFile(FileId(getFileName(STRING_FILE, name), FILE_TYPE_STRING, FILE_MODE_READ), true); + deleteIndexFiles = false; + } + } else { + FileCache::releaseFile(FileId(getFileName(DATA_FILE, name), FILE_TYPE_DATA, FILE_MODE_READ), true); + } + if (deleteIndexFiles) { + for (uint32_t i = 0; i < nbIndexes; ++i) { + FileCache::releaseFile(FileId(getFileName(i, name), FILE_TYPE_INDEX, FILE_MODE_READ), true); + } + } + + // Try to remove the parent directories (will remove only if it is empty). + char parentHour[FN_REFLEN]; + rmdir(FileUtil::getParent(name, parentHour)); + char parentDay[FN_REFLEN]; + rmdir(FileUtil::getParent(parentHour, parentDay)); + } catch(const SparrowException& e) { + e.toLog(); + } + } +} + +// Create a partition reader. Need read or write lock on master file! +PartitionReader* PersistentPartition::createReader(const uint32_t index, const bool isString, const BlockCacheHint& hint) const _THROW_(SparrowException) { + if (getVersion() >= PersistentPartition::appendVersion_) { + if (isString) { + return new PartitionReader(*getMainPartition(), STRING_FILE, hint); + } else if (index == DATA_FILE && !isMain()) { + return new PartitionReader(*getMainPartition(), index, hint); + } + } + return new PartitionReader(*this, getFileId(index, isString), hint); +} + +FileHeaderBase* PersistentPartition::readHeader(const uint32_t fileId, FileReader& reader) const _THROW_(SparrowException) { + FileHeaderBase* header = 0; + bool ok = true; + try { + header = readHeader2(fileId, reader); + } catch(const SparrowException& e) { + if (fileId == DATA_FILE || fileId == STRING_FILE) + throw; // Cannot repair + if (!sparrow_auto_partition_repair) + throw; // Automatic repair is disabled + spw_print_warning("%s", e.getText()); + ok = false; + } + + if (!ok) { + char name[FN_REFLEN]; + reader.getFileName(name); + spw_print_information("Rebuilding %s...", name); + reader.close(true); + try { + const_cast(this)->rebuildIndex(fileId); + } catch(const SparrowException& e) { + MasterRepairTask* repairTask = new MasterRepairTask(master_.get()); + Scheduler::addTask(repairTask, Scheduler::now(), true); + throw SparrowException::create(false, "Failed to rebuild %s: %s", name, e.getText()); + } + reader.open(); + spw_print_information("Rebuilt %s successfully.", name); + reader.seek(0); + header = readHeader2(fileId, reader); + } + return header; +} + +FileHeaderBase* PersistentPartition::readHeader2(const uint32_t fileId, FileReader& reader) const _THROW_(SparrowException) { + try { + FileHeaderBase* header = 0; + if (getVersion() >= PersistentPartition::appendVersion_) { + if (fileId == DATA_FILE) { + DataFileHeader* h = new DataFileHeader(); + reader >> *h; + header = h; + } else if (fileId == STRING_FILE) { + StringFileHeader* h = new StringFileHeader(); + reader >> *h; + header = h; + } else { + IndexFileHeader* h = new IndexFileHeader(); + reader >> *h; + header = h; + } + } else { + uint8_t dummy; + uint8_t format; + reader >> dummy >> format >> dummy >> dummy; + reader.setVersion(format); + if (format > 1) { + reader.seek(reader.getFileSize() - FileHeader::size(format)); + } + FileHeader* h = new FileHeader(); + reader >> *h; + if (fileId != DATA_FILE) { + if (format < 3) { + h->initialize(getMaster().getTreeNodeSize(fileId)); + } else { + h->initialize(); + } + } + header = h; + } + assert(header != 0); + return header; + } catch(const SparrowException& e) { + char name[FN_REFLEN]; + throw SparrowException::create(false, "Cannot read header of file %s: %s", reader.getFileName(name), e.getText()); + } +} + +template class BinarySearch; + +// Search tree for a given key. If necessary, use binary search on index records to refine the position. +Position PersistentPartition::searchTree(Context& context, const uint32_t partition, PartitionReader& reader, + PartitionReader& stringReader, const KeyValue& key, const SearchFlag searchFlag, const bool refine) const { + SPARROW_ENTER("PersistentPartition::searchTree"); + + // Note the tree cannot be empty. + QueryInfo& queryInfo = context.getQueryInfo(); + const TableFields& fields = context.getShare().getMappedFields(); + const uint32_t index = queryInfo.getIndex(); + TABLE& table = context.getTable(); + const bool stopOnFirstMatch = queryInfo.stopOnFirstMatch(key); + const RecordWrapper& treeReader = context.getRecordWrapper(reader.getColumnAlterSerial(), index, true); + uint8_t* curKey = queryInfo.getCurrentKey().getKey(); + uint8_t* minKey = queryInfo.getMinKey().getKey(); + uint8_t* maxKey = queryInfo.getMaxKey().getKey(); + const uint32_t keyLength = queryInfo.getKeyLength(); + uint32_t minNode = UINT_MAX; + uint32_t minStart = UINT_MAX; + uint32_t minEnd = UINT_MAX; + uint32_t maxNode = UINT_MAX; + uint32_t maxStart = UINT_MAX; + uint32_t maxEnd = UINT_MAX; + bool exactMatch = false; + uint32_t node = 0; + const FileHeaderBase& header = reader.getHeader(); + const uint32_t nodes = header.getNodes(); + const bool treeComplete = header.isTreeComplete(); + while (node < nodes) { + reader.seekTree(node); + uint32_t start; // TODO row size + uint32_t end; + reader >> start >> end; + treeReader.readUsingTableBitmap(table, reader, stringReader, true, curKey, true); + const int cmp = queryInfo.compareKeys(fields, queryInfo.getCurrentKey(), key); + if (cmp == 0) { // Node is equal to key. + exactMatch = true; + if (searchFlag == SearchFlag::EQ || searchFlag == SearchFlag::GE + || searchFlag == SearchFlag::LE || searchFlag == SearchFlag::LE_LAST) { + if (stopOnFirstMatch) { + minNode = node; + minStart = start; + minEnd = end; + maxNode = node; + maxStart = start; + maxEnd = end; + break; + } else if (treeComplete) { + memcpy(minKey, curKey, keyLength); + minNode = node; + minStart = start; + minEnd = end; + memcpy(maxKey, curKey, keyLength); + maxNode = node; + maxStart = start; + maxEnd = end; + if (searchFlag == SearchFlag::LE_LAST) { + // Goto right (greater) child. + node = node * 2 + 2; + } else { + // Goto left (smaller) child. + node = node * 2 + 1; + } + } else { + if (searchFlag == SearchFlag::LE_LAST) { + memcpy(minKey, curKey, keyLength); + minNode = node; + minStart = start; + minEnd = end; + + // Goto right (greater) child. + node = node * 2 + 2; + } else { + memcpy(maxKey, curKey, keyLength); + maxNode = node; + maxStart = start; + maxEnd = end; + + // Goto left (smaller) child. + node = node * 2 + 1; + } + } + } else if (searchFlag == SearchFlag::GT) { + memcpy(minKey, curKey, keyLength); + minNode = node; + minStart = start; + minEnd = end; + + // Goto right (greater) child. + node = node * 2 + 2; + } else if (searchFlag == SearchFlag::LT) { + memcpy(maxKey, curKey, keyLength); + maxNode = node; + maxStart = start; + maxEnd = end; + + // Goto left (smaller) child. + node = node * 2 + 1; + } + } else if (cmp > 0) { // Node is greater than key. + // Check if node is smaller than maxKey. + if (maxStart == UINT_MAX || queryInfo.compareKeys(fields, queryInfo.getCurrentKey(), queryInfo.getMaxKey()) <= 0) { + memcpy(maxKey, curKey, keyLength); + maxNode = node; + maxStart = start; + maxEnd = end; + } + // Goto left (smaller) child. + node = node * 2 + 1; + } else if (cmp < 0) { // Node is smaller than key. + // Check if node is larger than minKey. + if (minStart == UINT_MAX || queryInfo.compareKeys(fields, queryInfo.getCurrentKey(), queryInfo.getMinKey()) >= 0) { + memcpy(minKey, curKey, keyLength); + minNode = node; + minStart = start; + minEnd = end; + } + // Goto right (greater) child. + node = node * 2 + 2; + } + } + uint32_t dataRow = 0; + uint32_t indexRow = UINT_MAX; + uint32_t indexStart = UINT_MAX; + uint32_t indexEnd = UINT_MAX; + uint32_t treeNode = UINT_MAX; + if (exactMatch && stopOnFirstMatch) { + // Exact match found. + if (searchFlag == SearchFlag::EQ || searchFlag == SearchFlag::GE + || searchFlag == SearchFlag::LE || searchFlag == SearchFlag::LE_LAST) { + indexRow = searchFlag == SearchFlag::LE_LAST ? minEnd : minStart; + indexStart = minStart; + indexEnd = minEnd; + treeNode = minNode; + } else if (searchFlag == SearchFlag::GT) { + if (minEnd + 1 == header.getRecords()) { + return Position(partition); + } else { + indexRow = minEnd + 1; + indexStart = indexRow; + if (indexRow == maxStart) { + // Contiguous intervals: can set index end and tree node. + indexEnd = maxEnd; + treeNode = maxNode; + } + } + } else if (searchFlag == SearchFlag::LT) { + if (maxStart == 0) { + return Position(partition); + } else { + indexRow = maxStart - 1; + indexEnd = indexRow; + if (indexRow == minEnd) { + // Contiguous intervals: can set index start and tree node. + indexStart = minStart; + treeNode = minNode; + } + } + } + } else if (treeComplete) { + // Tree is complete. + if (exactMatch) { + // Exact match. + if (searchFlag == SearchFlag::EQ || searchFlag == SearchFlag::GE || searchFlag == SearchFlag::GT) { + if (maxStart == UINT_MAX) { + return Position(partition); + } else { + indexRow = maxStart; + indexStart = maxStart; + indexEnd = maxEnd; + treeNode = maxNode; + } + } else if (searchFlag == SearchFlag::LE || searchFlag == SearchFlag::LE_LAST + || searchFlag == SearchFlag::LT) { + if (minEnd == UINT_MAX) { + return Position(partition); + } else { + indexRow = minEnd; + indexStart = minStart; + indexEnd = minEnd; + treeNode = minNode; + } + } + } else { + // No exact match. + if (searchFlag == SearchFlag::EQ) { + return Position(partition); + } else if (searchFlag == SearchFlag::GE || searchFlag == SearchFlag::GT) { + if (maxStart == UINT_MAX) { + return Position(partition); + } else { + indexRow = maxStart; + indexStart = maxStart; + indexEnd = maxEnd; + treeNode = maxNode; + } + } else if (searchFlag == SearchFlag::LE || searchFlag == SearchFlag::LE_LAST + || searchFlag == SearchFlag::LT) { + if (minEnd == UINT_MAX) { + return Position(partition); + } else { + indexRow = minEnd; + indexStart = minStart; + indexEnd = minEnd; + treeNode = minNode; + } + } + } + } else { + // Tree is not complete. + // Use binary search on index records to refine position. + const RecordWrapper& recordWrapper = context.getRecordWrapper(reader.getColumnAlterSerial(), index, false); + ComparatorPersistent comparator(context, recordWrapper, reader, stringReader, key, curKey); + const uint32_t bsStart = (minEnd == UINT_MAX) ? 0 : minEnd; + const uint32_t bsEnd = (maxStart == UINT_MAX) ? static_cast(header.getRecords()) - 1 : maxStart; + indexRow = BinarySearch::find(comparator, bsStart, bsEnd - bsStart + 1, searchFlag); + if (indexRow == UINT_MAX) { + return Position(partition); + } + } + if (refine) { + reader.seekRecord(indexRow); + reader >> dataRow; + } + return Position(partition, dataRow, indexRow, indexStart, indexEnd, treeNode); +} + +Position PersistentPartition::indexFind(Context& context, const uint32_t partition, const KeyValue& key, const SearchFlag searchFlag) const { + SPARROW_ENTER("PersistentPartition::indexFind"); + try { + QueryInfo& queryInfo = context.getQueryInfo(); + const uint32_t index = queryInfo.getIndex(); + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeForward1_); + PartitionReaderGuard stringReaderGuard(context, partition, index, true, BlockCacheHint::mediumAround2_); + return searchTree(context, partition, readerGuard.get(), stringReaderGuard.get(), key, searchFlag, true); + } catch(const SparrowException& e) { + e.toLog(); + return Position(partition); + } +} + +Position PersistentPartition::indexFirst(Context& context, const uint32_t partition) const { + SPARROW_ENTER("PersistentPartition::indexFirst"); + try { + // Use smallest node in tree (guaranteed to be the smallest index value). + const QueryInfo& queryInfo = context.getQueryInfo(); + const uint32_t index = queryInfo.getIndex(); + uint32_t node; + uint32_t start; + uint32_t end; + { + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeForward1_); + PartitionReader& reader = readerGuard.get(); + node = reader.getHeader().getMinNode(); + reader.seekTree(node); + reader >> start >> end; + } + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeForward1_); + PartitionReader& reader = readerGuard.get(); + reader.seekRecord(start); + uint32_t row; + reader >> row; + return Position(partition, row, start, start, end, node); + } catch(const SparrowException& e) { + e.toLog(); + return Position(partition); + } +} + +Position PersistentPartition::indexLast(Context& context, const uint32_t partition) const { + SPARROW_ENTER("PersistentPartition::indexLast"); + try { + // Use largest node in tree (guaranteed to be the largest index value). + const QueryInfo& queryInfo = context.getQueryInfo(); + const uint32_t index = queryInfo.getIndex(); + uint32_t node; + uint32_t start; + uint32_t end; + { + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeForward1_); + PartitionReader& reader = readerGuard.get(); + node = reader.getHeader().getMaxNode(); + reader.seekTree(node); + reader >> start >> end; + } + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeBackward1_); + PartitionReader& reader = readerGuard.get(); + reader.seekRecord(end); + uint32_t row; + reader >> row; + return Position(partition, row, end, start, end, node); + } catch(const SparrowException& e) { + e.toLog(); + return Position(partition); + } +} + +Position PersistentPartition::indexNext(Context& context, const Position& position) const { + SPARROW_ENTER("PersistentPartition::indexNext"); + const uint32_t partition = position.getPartition(); + if (!position.isValid() || !position.hasIndexHint()) { + return Position(partition); + } + try { + const QueryInfo& queryInfo = context.getQueryInfo(); + const uint32_t index = queryInfo.getIndex(); + uint32_t indexRow = position.getIndexHint(); + uint32_t start = position.getStartHint(); + uint32_t end = position.getEndHint(); + uint32_t node = position.getTreeHint(); + const uint32_t records = getRecords(); + if (indexRow + 1 < records) { + indexRow++; + if (node != UINT_MAX && indexRow > end) { + // The interval hint is no longer valid and the tree is complete; find next node in tree. + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeAround1_); + PartitionReader& reader = readerGuard.get(); + const FileHeaderBase& header = reader.getHeader(); + if (header.isTreeComplete()) { + node = header.getNextNode(node); + reader.seekTree(node); + reader >> start >> end; + } else { + start = UINT_MAX; + end = UINT_MAX; + node = UINT_MAX; + } + } + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeForward1_); + PartitionReader& reader = readerGuard.get(); + reader.seekRecord(indexRow); + uint32_t row; + reader >> row; + return Position(partition, row, indexRow, start, end, node); + } else { + return Position(partition); + } + } catch(const SparrowException& e) { + e.toLog(); + return Position(partition); + } +} + +Position PersistentPartition::indexPrevious(Context& context, const Position& position) const { + SPARROW_ENTER("PersistentPartition::indexPrevious"); + const uint32_t partition = position.getPartition(); + if (!position.isValid() || !position.hasIndexHint()) { + return Position(partition); + } + try { + const QueryInfo& queryInfo = context.getQueryInfo(); + const uint32_t index = queryInfo.getIndex(); + uint32_t indexRow = position.getIndexHint(); + uint32_t start = position.getStartHint(); + uint32_t end = position.getEndHint(); + uint32_t node = position.getTreeHint(); + if (indexRow > 0) { + indexRow--; + if (node != UINT_MAX && indexRow < start) { + // The interval hint is no longer valid and the tree is complete; find previous node in tree. + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeAround1_); + PartitionReader& reader = readerGuard.get(); + const FileHeaderBase& header = reader.getHeader(); + if (header.isTreeComplete()) { + node = header.getPrevNode(node); + reader.seekTree(node); + reader >> start >> end; + } else { + start = UINT_MAX; + end = UINT_MAX; + node = UINT_MAX; + } + } + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeBackward1_); + PartitionReader& reader = readerGuard.get(); + reader.seekRecord(indexRow); + uint32_t row; + reader >> row; + return Position(partition, row, indexRow, start, end, node); + } else { + return Position(partition); + } + } catch(const SparrowException& e) { + e.toLog(); + return Position(partition); + } +} + +Position PersistentPartition::moveNext(Context& context, const Position& position) const { + SPARROW_ENTER("PersistentPartition::moveNext"); + const uint32_t partition = position.getPartition(); + const RowNumber row = position.getRow() + 1; + if (row < static_cast(getRecordOffset() + getRecords())) { + return Position(partition, row); + } else { + return Position(partition); + } +} + +Position PersistentPartition::movePrevious(Context& context, const Position& position) const { + SPARROW_ENTER("PersistentPartition::movePrevious"); + const uint32_t partition = position.getPartition(); + const RowNumber row = position.getRow(); + if (row == static_cast(getRecordOffset())) { + return Position(partition); + } else { + return Position(partition, row - 1); + } +} + +Position PersistentPartition::moveAbsolute(Context& context, const Position& position) const { + SPARROW_ENTER("PersistentPartition::moveAbsolute"); + return position; +} + +Position PersistentPartition::moveFirst(Context& context, const uint32_t partition) const { + SPARROW_ENTER("PersistentPartition::moveFirst"); + return Position(partition, static_cast(getRecordOffset())); +} + +Position PersistentPartition::moveLast(Context& context, const uint32_t partition) const { + SPARROW_ENTER("PersistentPartition::moveLast"); + PartitionReaderGuard readerGuard(context, partition, DATA_FILE, false, BlockCacheHint::largeBackward2_); + PartitionReader& reader = readerGuard.get(); + const FileHeaderBase& header = reader.getHeader(); + return Position(partition, static_cast(getRecordOffset() + header.getRecords() - 1)); +} + +// Use tree to count records in range. The result may be overestimated. +uint32_t PersistentPartition::recordsInRange(Context& context, const uint32_t partition, const key_range* minKey, const key_range* maxKey) const { + SPARROW_ENTER("PersistentPartition::recordsInRange"); + try { + QueryInfo& queryInfo = context.getQueryInfo(); + const uint32_t index = queryInfo.getIndex(); + uint32_t minRow = UINT_MAX; + uint32_t maxRow = UINT_MAX; + if (minKey == 0) { + minRow = 0; + } else { + const KeyValue minKeyValue(minKey); + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeForward1_); + PartitionReaderGuard stringReaderGuard(context, partition, index, true, BlockCacheHint::mediumAround2_); + + // Min key flag is either HA_READ_AFTER_KEY or HA_READ_KEY_EXACT. + const Position pos = searchTree(context, partition, readerGuard.get(), stringReaderGuard.get(), minKeyValue, + minKey->flag == HA_READ_KEY_EXACT ? SearchFlag::GE : SearchFlag::GT, false); + minRow = pos.getIndexHint(); + } + if (maxKey == 0) { + maxRow = getRecords() - 1; + } else { + const KeyValue maxKeyValue(maxKey); + PartitionReaderGuard readerGuard(context, partition, index, false, BlockCacheHint::largeForward1_); + PartitionReaderGuard stringReaderGuard(context, partition, index, true, BlockCacheHint::mediumAround2_); + + // Max key flag is either HA_READ_BEFORE_KEY or HA_READ_AFTER_KEY. + const Position pos = searchTree(context, partition, readerGuard.get(), stringReaderGuard.get(), maxKeyValue, + maxKey->flag == HA_READ_AFTER_KEY ? SearchFlag::LE_LAST : SearchFlag::LT, false); + maxRow = pos.getIndexHint(); + } + if (minRow != UINT_MAX && maxRow != UINT_MAX && minRow <= maxRow) { + return maxRow - minRow + 1; + } else { + return 0; + } + } catch(const SparrowException& e) { + e.toLog(); + return 0; + } +} + +// Reads a given index record and sets MySQL fields. +bool PersistentPartition::readKey(Context& context, const Position& position, const bool forward, + const key_part_map keyPartMap, uint8_t* buffer, const bool keyFormat) const { + SPARROW_ENTER("PersistentPartition::readKey"); + if (!position.hasIndexHint()) { + return false; + } + const QueryInfo& queryInfo = context.getQueryInfo(); + const uint32_t index = queryInfo.getIndex(); + const bool useTree = position.hasTreeHint(); + const BlockCacheHint& hint = useTree ? BlockCacheHint::largeForward1_ : (forward ? BlockCacheHint::largeForward1_ : BlockCacheHint::largeBackward1_); + PartitionReaderGuard readerGuard(context, position.getPartition(), index, false, hint); + PartitionReaderGuard stringReaderGuard(context, position.getPartition(), index, true, BlockCacheHint::mediumAround2_); + PartitionReader& reader = readerGuard.get(); + const RecordWrapper& recordWrapper = context.getRecordWrapper(getColumnAlterSerial(), index, useTree); + if (useTree) { + reader.seekTreeData(position.getTreeHint()); + } else { + reader.seekRecordData(position.getIndexHint()); + } + recordWrapper.readUsingKeyPartMap(reader, stringReaderGuard.get(), keyPartMap, buffer, keyFormat); + return true; +} + +// Reads a given data record and sets MySQL fields. +bool PersistentPartition::readData(Context& context, const Position& position, uint8_t* buffer, const BlockCacheHint& hint) const { + SPARROW_ENTER("PersistentPartition::readData"); + assert(position.isValid()); + QueryInfo& queryInfo = context.getQueryInfo(); + + // Reset NULL flags. + TABLE& table = context.getTable(); + memset(buffer, 0, table.s->null_bytes); + if (queryInfo.isCoveringIndex() && position.hasIndexHint()) { + return readKey(context, position, true, queryInfo.getDataMap(), buffer, false); + } else { + PartitionReaderGuard readerGuard(context, position.getPartition(), DATA_FILE, false, hint); + PartitionReader& reader = readerGuard.get(); + reader.seekRecord(position.getRow()); + const RecordWrapper* recordWrapper = NULL; + if (skippedColumnIds_.isEmpty()) { + recordWrapper = &context.getRecordWrapper(getColumnAlterSerial(), DATA_FILE, false); + } else { + recordWrapper = &context.getRecordWrapper(getColumnAlterSerial(), mainPartition_->getSerial(), getSkippedColumns()); + } + //const RecordWrapper& recordWrapper = context.getRecordWrapper(getColumnAlterSerial(), DATA_FILE, false); + PartitionReaderGuard stringReaderGuard(context, position.getPartition(), DATA_FILE, true, BlockCacheHint::mediumAround2_); + recordWrapper->readUsingTableBitmap(table, reader, stringReaderGuard.get(), false, buffer, false); + return true; + } +} + + +bool PersistentPartition::updateData(Context& context, const Position& position, const uint8_t* buffer) { + SPARROW_ENTER("PersistentPartition::updateData"); + assert(position.isValid()); + + // Read existing record into memory. + PartitionReaderGuard readerGuard(context, position.getPartition(), DATA_FILE, false, BlockCacheHint::smallAround2_); + PartitionReader& reader = readerGuard.get(); + const uint64_t offset = reader.seekRecord(position.getRow()); + const RecordWrapper& recordWrapper = context.getRecordWrapper(getColumnAlterSerial(), DATA_FILE, false); + const uint32_t recordSize = recordWrapper.getSize(); + ByteBuffer data(static_cast(IOContext::getTempBuffer1(recordSize)), recordSize); + reader >> data; + data.position(recordWrapper.getBitSize()); + data.limit(recordSize); + uint8_t* newRecord = data.getData(); + + // Update record. + const TableFields& fields = recordWrapper.getFields(); + const uint32_t n = fields.length(); + uint32_t col = 0; + uint32_t bitOffset = 0; + for (uint32_t i = 0; i < n; ++i) { + const FieldBase& field = *fields[i]; + if (!field.isMapped()) { + bitOffset += field.getBits(); + continue; + } + if (!context.isUpdatableColumn(col++)) { + data.advance(field.getSize()); + bitOffset += field.getBits(); + continue; + } + if (field.readMySqlPersistent(buffer, data)) { + newRecord[bitOffset / 8] |= (1 << (bitOffset % 8)); + } else if (field.isNullable()) { + newRecord[bitOffset / 8] &= ~(1 << (bitOffset % 8)); + } + bitOffset += field.getBits(); + } + + // Write modified record to data file. + char filename[FN_REFLEN]; + mainPartition_->getFileName(DATA_FILE, filename); + const PartitionFile partitionFile(*mainPartition_, DATA_FILE); + const SimpleWriteCacheHint writeHint(partitionFile, 3); + FileWriter writer(filename, FILE_TYPE_DATA, FILE_MODE_UPDATE, &writeHint, offset, recordSize); + data.position(0); + data.limit(recordSize); + writer << data; + writer.write(); + return true; +} + + +// Alter this persistent partition so it fits the current table/index definition. +// If task is null, alterations are performed synchronously. +// Otherwise, alteration tasks are sent asynchronously to the alteration queue +void PersistentPartition::alter(const Task* task) _THROW_(SparrowException) { + SPARROW_ENTER("PersistentPartition::alter"); + Alterations alterations; + uint32_t newSerial = 0; + { + ReadGuard guard(master_->getLock()); + newSerial = master_->getIndexAlterSerial(); + const Alterations& masterAlterations = master_->getIndexAlterations(); + for (uint32_t i = 0; i < masterAlterations.length(); ++i) { + const Alteration& masterAlteration = masterAlterations[i]; + if (masterAlteration.getSerial() > getIndexAlterSerial()) { + alterations.append(masterAlteration); + } + } + } + + // Optimize alterations: create + drop = NOP (but keep drop + create). + for (uint32_t i = 0; i < alterations.length(); ++i) { + const Alteration& alteration = alterations[i]; + if (alteration.getType() == ALT_ADD_INDEX) { + const uint32_t id = alteration.getId(); + for (uint32_t j = i + 1; j < alterations.length(); ++j) { + const Alteration& nextAlteration = alterations[j]; + if (nextAlteration.getType() == ALT_DROP_INDEX && nextAlteration.getId() == id) { + alterations.removeAt(j); + alterations.removeAt(i--); // Wrapping. + break; + } + } + } + } +#ifndef NDEBUG + if (!alterations.isEmpty()) { + DBUG_PRINT("sparrow_alter", ("Altering partition %s.%s.%llu", master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + } +#endif + if (task == 0) { + DBUG_PRINT("sparrow_alter", ("synchronous alteration")); + AlterationStats stats; + for (uint32_t i = 0; i < alterations.length(); ++i) { + const Alteration& alteration = alterations[i]; + const AlterationType type = alteration.getType(); + try { + if (type == ALT_ADD_INDEX) { + stats += createIndex(alteration.getId(), task); + } else if (type == ALT_DROP_INDEX) { + stats += dropIndex(alteration.getId()); + } + } catch(const SparrowException& e) { + e.toLog(); + } + } + setIndexAlterSerial(newSerial); + dataSize_ += stats.getDeltaDataSize(); + indexSize_ += stats.getDeltaIndexSize(); + + WriteGuard guard(master_->getLock()); + master_->resetCoalescingTimestamp(); + } else { + uint32_t* counter = new uint32_t; + *counter = alterations.length(); + AlterationStats* stats = new AlterationStats(); + for (uint32_t i = 0; i < alterations.length(); ++i) { + [[maybe_unused]] const Alteration& alteration = alterations[i]; + DBUG_PRINT("sparrow_alter", ("addTask for partition %s.%s.%llu, type %d, alter Id %d, alter serial %d, counter %u", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), + alteration.getType(), alteration.getId(), alteration.getSerial(), *counter)); + Scheduler::addTask(new AlterTask(counter, stats, newSerial, this, alterations[i])); + } + } +} + +// This method returns the next persistent partition to alter (older), or 0 if none. +PersistentPartitionGuard PersistentPartition::alterationDone(const AlterationStats& stats, const uint32_t newIndexAlterSerial) { + SPARROW_ENTER("PersistentPartition::alterationDone"); + DBUG_PRINT("sparrow_alter", ("Partition %llu, setting IndexAlterSerial from %u to %u", static_cast(getSerial()), indexAlterSerial_, newIndexAlterSerial)); + + setIndexAlterSerial(newIndexAlterSerial); + dataSize_ += stats.getDeltaDataSize(); + indexSize_ += stats.getDeltaIndexSize(); + { + // Update master file. + WriteGuard guard(master_->getLock()); + master_->setDataSize(master_->getDataSize() + stats.getDeltaDataSize()); + master_->setIndexSize(master_->getIndexSize() + stats.getDeltaIndexSize()); + master_->indexAlterationDone(); + master_->resetCoalescingTimestamp(); + master_->toDisk(); + } + DBUG_PRINT("sparrow_alter", ("Updated Master file. Find next partition (older)")); + + // Find next partition (older). + { + ReadGuard guard(master_->getLock()); + const Partitions& partitions = master_->getPartitions(); + uint32_t i; + partitions.bsearch(*this, i, 2); + for (;;) { + if (i == partitions.length()) { + if (i == 0) { + break; + } + i--; + } + Partition* partition = partitions[i]; + if (partition->getSerial() < getSerial() && partition->isIndexAlterable() && !partition->isReady() && !partition->isTemporary()) { + DBUG_PRINT("sparrow_alter", ("Next older partition: %llu", static_cast(partition->getSerial()))); + return PersistentPartitionGuard(static_cast(partition)); + } + if (i-- == 0) { + break; + } + } + } + + // Nothing found, but check if alter needs to be restarted, in case alterations were stacked meanwhile. + DBUG_PRINT("sparrow_alter", ("Nothing found for %s.%s.%llu. Check if alter need to be restarted.", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + if (!master_->startIndexAlter(true)) { + // No more alteration: start coalescing, if necessary. + DBUG_PRINT("sparrow_alter", ("No more alteration: start coalescing, if necessary")); + master_->coalesce(); + } + return PersistentPartitionGuard(); +} + +template class Sort; + +AlterationStats PersistentPartition::createIndex(const uint32_t index, const Task* task) _THROW_(SparrowException) { + SPARROW_ENTER("PersistentPartition::createIndex"); +#ifndef NDEBUG + uint64_t tstart = my_micro_time(); + Str descr; +#endif + DBUG_PRINT("sparrow_alter", ("Creating index %u", index)); + TableFieldsGuard fieldsGuard; + TableFields& fields = fieldsGuard.get(); + ColumnIds columnIds; + { + ReadGuard guard(master_->getLock()); + master_->getFields(getColumnAlterSerial(), false, fields, &getSkippedColumns()); + columnIds = master_->getIndexes()[index].getColumnIds(); +#ifndef NDEBUG + descr = master_->getIndexes()[index].getName(); + descr += Str("("); + for (uint32_t i = 0; i < columnIds.length(); ++i) { + if (i > 0) { + descr += Str(", "); + } + descr += master_->getColumns()[columnIds[i]].getName(); + } + descr += Str(")"); +#endif + } + DBUG_PRINT("sparrow_alter", ("Adding index %s on partition %s.%s.%llu", descr.c_str(), + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + UpdateGuard updateGuard(*master_); + uint64_t totalSize = 0; + const uint64_t threshold = sparrow_cache0_size / 5; // 20% of cache level 0. + const uint64_t dataSize = getMainPartition()->getDataSize(); + DBUG_PRINT("sparrow_alter", ("Data size %llu > %llu ?", static_cast(dataSize), static_cast(threshold))); + if (dataSize > threshold) { + // If data file is too large, create multiple index files and coalesce them. + // To do so, create temporary persistent partitions and coalesce them. + const uint32_t rows = 1 + static_cast((getRecords() * threshold) / dataSize); + PersistentPartitions partitions(1 + getRecords() / rows); + for (uint32_t offset = 0; offset < getRecords(); offset += rows) { + const uint32_t records = std::min(rows, getRecords() - offset); + const uint64_t recordOffset = getRecordOffset() + offset; + DBUG_PRINT("sparrow_alter", ("Creating new temp persistent partition for %u records at offset %llu", records, static_cast(recordOffset))); + PersistentPartition* temporary = master_->newTemporaryPersistentPartition(*this, records, recordOffset); + DBUG_PRINT("sparrow_alter", ("Creating corresponding index file")); + temporary->createIndexFile(index, task, fields, columnIds, static_cast(recordOffset), records); + DBUG_PRINT("sparrow_alter", ("Appending new partition")); + partitions.append(PersistentPartitionGuard(temporary)); + } + DBUG_PRINT("sparrow_alter", ("Generating index file")); + totalSize = Coalescing::generateIndexFile(partitions, index, this, task); + DBUG_PRINT("sparrow_alter", ("calling coalescingDone")); + master_->coalescingDone(this, partitions); + } else { + // Data file is not too large, create index file in one pass. + DBUG_PRINT("sparrow_alter", ("calling createIndexFile")); + totalSize = createIndexFile(index, task, fields, columnIds, static_cast(getRecordOffset()), getRecords()); + } +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + DBUG_PRINT("sparrow_alter", ("Created index %u for partition %s.%s.%llu in %s", index, + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), duration.c_str())); +#endif + return AlterationStats(0, static_cast(totalSize)); +} + +void PersistentPartition::rebuildIndex(const uint32_t index) _THROW_(SparrowException) { + // Remove the corrupted index file, then rebuild the file. + char filename[FN_REFLEN]; + getFileName(index, filename); + FileId fileId(filename, FILE_TYPE_INDEX, FILE_MODE_READ); + FileCache::releaseFile(fileId, true); + + createIndex(index, NULL); +} + +uint64_t PersistentPartition::createIndexFile(const uint32_t index, const Task* task, + const TableFields& fields, const ColumnIds& columnIds, const uint32_t startRow, const uint32_t rows) _THROW_(SparrowException) { + Indirector indirector; + SYSxvector count; + { + PartitionReaderGuard guard1(*this, DATA_FILE, false, BlockCacheHint::largeAround0_); + PartitionReader& reader1 = guard1.get(); + PartitionReaderGuard stringGuard1(*this, DATA_FILE, true, BlockCacheHint::largeAround0_); + PartitionReader& stringReader1 = stringGuard1.get(); + PartitionReaderGuard guard2(*this, DATA_FILE, false, BlockCacheHint::largeAround0_); + PartitionReader& reader2 = guard2.get(); + PartitionReaderGuard stringGuard2(*this, DATA_FILE, true, BlockCacheHint::largeAround0_); + PartitionReader& stringReader2 = stringGuard2.get(); + const AlterComparator comparator(task, fields, columnIds, getSkippedColumns(), reader1, stringReader1, reader2, stringReader2); + for (uint32_t i = startRow; i < startRow + rows; ++i) { + indirector.append(i); + } + DBUG_PRINT("sparrow_alter", ("Quick sort of %u rows", rows)); + Sort::quickSort(indirector, comparator, 0, rows); + + // Count distinct values. + DBUG_PRINT("sparrow_alter", ("Counting distinct values")); + uint32_t start = 0; + uint32_t previousRow = 0; + for (uint32_t row = 0; row < rows; ++row) { + if (task != 0 && (row % 16384) == 0 && task->isStopping()) { + return 0; + } + const uint32_t currentRow = indirector[row]; + if (row == 0) { + start = row; + } else { + const int cmp = comparator.compare(previousRow, currentRow, false); + assert(cmp <= 0); + if (cmp != 0) { + count.append(row - start); + start = row; + } + } + previousRow = currentRow; + } + count.append(rows - start); + DBUG_PRINT("sparrow_alter", ("counted %u", rows - start)); + } + + // Generate index file from indirector. + PartitionReaderGuard guard(*this, DATA_FILE, false, BlockCacheHint::largeAround0_); + PartitionReaderGuard stringGuard(*this, DATA_FILE, true, BlockCacheHint::largeAround0_); + AlterWriter alterWriter(*this, fields, columnIds, getSkippedColumns(), guard.get(), stringGuard.get()); + const uint32_t nNodes = count.length(); + TreeNodes nodes(nNodes); + uint32_t start = 0; + for (uint32_t i = 0; i < nNodes; ++i) { + const uint32_t end = start + count[i]; + nodes.append(TreeNode(start, end - 1)); + start = end; + } + assert(nodes.length() == nNodes); + const uint32_t recordSize = 4; + const bool isAppend = getVersion() >= PersistentPartition::appendVersion_; + char filename[FN_REFLEN]; + DBUG_PRINT("sparrow_alter", ("Writing new file")); + FileWriter writer(getFileName(index, filename), FILE_TYPE_INDEX, FILE_MODE_CREATE); + if (isAppend) { + IndexFileHeader dummy; + writer << dummy; + } else { + // Write file format. + PersistentPartition::writeFileFormat(writer); + } + + // Write row numbers. + DBUG_PRINT("sparrow_alter", ("Writing row numbers (%u)", rows)); + for (uint32_t row = 0; row < rows; ++row) { + if (task != 0 && (row % 16384) == 0 && task->isStopping()) { + return 0; + } + writer << indirector[row]; + } + uint64_t offset = writer.getFileOffset(); + + // Write tree. + DBUG_PRINT("sparrow_alter", ("Writing tree (%u nodes)", nNodes)); + const TreeOrder& treeOrder = TreeOrder::get(nNodes); + for (uint32_t i = 0; i < nNodes; ++i) { + if (task != 0 && (i % 16384) == 0 && task->isStopping()) { + return 0; + } + const uint32_t inode = treeOrder.getListIndex(i, nNodes); + assert(inode < nNodes); + const TreeNode& node = nodes[inode]; + const uint32_t start = node.getStart(); + const uint32_t end = node.getEnd(); + writer << start << end; + alterWriter.writeRecord(writer, indirector[start]); + } + const uint64_t treeSize = writer.getFileOffset() - offset; + uint64_t totalSize; + if (isAppend) { + DBUG_PRINT("sparrow_alter", ("Writing header")); + // Write header. + writer.write(); + IndexFileHeader header(index, recordSize, rows, static_cast(treeSize / nNodes), nNodes, period_.getMin(), period_.getMax()); + writer.seek(0, header.size()); + writer << header; + totalSize = header.getTotalSize(); + } else { + // Write binary data. + DBUG_PRINT("sparrow_alter", ("Writing binary data")); + offset = writer.getFileOffset(); + writer << alterWriter.getBinBuffer(); + const uint64_t binSize = writer.getFileOffset() - offset; + + // Padding to put header at the end of the file, taking into account its adjusted size. + const FileHeader header(binSize, static_cast(treeSize), true, static_cast(treeSize / nNodes), + recordSize, rows, index, period_.getMin(), period_.getMax()); + const uint64_t target = header.getTotalSize() - FileHeader::size(); + while (writer.getFileOffset() < target) { + writer << static_cast(0); + } + + // Write header. + DBUG_PRINT("sparrow_alter", ("Write header")); + writer << header; + totalSize = header.getTotalSize(); + } + DBUG_PRINT("sparrow_alter", ("Total size %llu", static_cast(totalSize))); + writer.write(); + return totalSize; +} + +AlterationStats PersistentPartition::dropIndex(const uint32_t index) { + SPARROW_ENTER("PersistentPartition::dropIndex"); +#ifndef NDEBUG + { + ReadGuard guard(master_->getLock()); + DBUG_PRINT("sparrow_alter", ("Removing index %s from partition %s.%s.%llu", master_->getIndexes()[index].getName().c_str(), + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + } +#endif + if (version_ == 0 && index == 0) { + return AlterationStats(); + } +#ifndef NDEBUG + uint64_t tstart = my_micro_time(); +#endif + AlterationStats stats; + try { + PartitionReaderGuard guard(*this, index, false, BlockCacheHint::smallForward0_); + PartitionReader& reader = guard.get(); + const FileHeaderBase& header = reader.getHeader(); + stats = AlterationStats(0, -static_cast(header.getTotalSize())); + } catch (const SparrowException& e) { + // Ignore exception: it is usually a "file not found" error because we are + // dropping an index not yet completely created, so the index file is missing. + } + + char name[FN_REFLEN]; + FileCache::releaseFile(FileId(getFileName(index, name), FILE_TYPE_INDEX, FILE_MODE_READ), true); +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + DBUG_PRINT("sparrow_alter", ("Deleted index %u for partition %s.%s.%llu in %s", index, + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), duration.c_str())); +#endif + return stats; +} + +bool PersistentPartition::makeChecks() const { + time_t start; + struct tm t; + start = static_cast(fileTime_ / 1000); + if (gmtime_r(&start, &t) == 0) { + return false; + } + return Master::checkForCorruption(t); +} + +} diff --git a/storage/sparrow/engine/persistent.h b/storage/sparrow/engine/persistent.h new file mode 100644 index 000000000000..807f6a10a56c --- /dev/null +++ b/storage/sparrow/engine/persistent.h @@ -0,0 +1,369 @@ +/* + Persistent partition. +*/ + +#ifndef _engine_persistent_h_ +#define _engine_persistent_h_ + +#include "search.h" +#include "fileutil.h" +#include "master.h" +#include "vec.h" +#include "../handler/hasparrow.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PersistentPartition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PersistentPartition : public Partition, public AbstractInterval { + friend ByteBuffer& operator >> (ByteBuffer& buffer, PersistentPartition& partition); + friend ByteBuffer& operator << (ByteBuffer& buffer, const PersistentPartition& partition); + +private: + + uint32_t version_; // See PersistentPartition::currentVersion_. + MasterGuard master_; + TimePeriod period_; // Partition period. May change if main partition. + uint64_t fileTime_; // File timestamp. Does not change after partition is created. + PersistentPartition* mainPartition_; + ChildPartitions childPartitions_; // List of child partitions. Populated only for main partitions. + uint32_t records_; + uint64_t dataSize_; + uint64_t indexSize_; + uint64_t dataRecords_; // Number of records in data file. + uint64_t recordOffset_; // Record offset in main partition. + ColumnIds skippedColumnIds_; // Skipped columns. Columns for which all values are NULL are not stored. + +public: + + static const uint32_t currentVersion_; + + static const uint32_t appendVersion_; + +private: + + Position searchTree(Context& context, const uint32_t partition, PartitionReader& reader, + PartitionReader& stringReader, const KeyValue& key, const SearchFlag searchFlag, const bool refine) const; + + uint64_t createIndexFile(const uint32_t index, const Task* task, + const TableFields& fields, const ColumnIds& columnIds, const uint32_t startRow, const uint32_t rows) _THROW_(SparrowException); + + void rebuildIndex(const uint32_t index) _THROW_(SparrowException); + + FileHeaderBase* readHeader2(const uint32_t fileId, FileReader& reader) const _THROW_(SparrowException); + +public: + + PersistentPartition(const uint32_t version, Master* master, const uint64_t serial, PersistentPartition* mainPartition, + const uint32_t filesystem, const uint32_t indexAlterSerial, const uint32_t columnAlterSerial, const TimePeriod& period, + const uint32_t records, const uint64_t dataSize, const uint64_t indexSize, const uint64_t dataRecords, const uint64_t recordOffset, const ColumnIds& skippedColumns) + : Partition(serial, mainPartition == 0 ? serial : mainPartition->getSerial(), filesystem, indexAlterSerial, columnAlterSerial), version_(version), + master_(master), period_(period), fileTime_(period.getMin()), records_(records), dataSize_(dataSize), indexSize_(indexSize), + dataRecords_(dataRecords), recordOffset_(recordOffset), skippedColumnIds_(skippedColumns) { + assert(period_.getLow() != 0 && period_.getUp() != 0); + mainPartition_ = mainPartition == 0 ? this : mainPartition; + } + + // Deserialization constructor. + PersistentPartition(Master* master) : Partition(0, 0, 0, 0, 0), master_(master), mainPartition_(0) { + } + + ~PersistentPartition(); + + void detach() override { + master_ = 0; + } + + // Attributes. + + TimePeriod getPeriod() const override { + return period_; + } + + uint32_t getRecords() const override { + return records_; + } + + uint64_t getDataSize() const override { + return dataSize_; + } + + uint64_t getIndexSize() const override { + return indexSize_; + } + + bool isTransient() const override { + return false; + } + + bool isIndexAlterable() const override { + return !isMain() || getVersion() < PersistentPartition::appendVersion_; + } + + bool isReady() const override { + return master_->getIndexAlterSerial() == getIndexAlterSerial() || !isIndexAlterable(); + } + + const char* getFileName(uint32_t fileId, char* name) const { + if (version_ == 0 && fileId != DATA_FILE && fileId != STRING_FILE) { + --fileId; + } + return master_->getFileName(version_, getFilesystem(), TimePeriod(fileTime_, period_.getMax()), fileId, getSerial(), getDataSerial(), name); + } + + CoalescingInfo getCoalescingInfo() const { + return CoalescingInfo(Pair(getVersion(), getColumnAlterSerial()), getVersion() >= PersistentPartition::appendVersion_ ? getDataSerial() : 0); + } + + const ColumnIds& getSkippedColumns() const { + return skippedColumnIds_; + } + + bool makeChecks() const; + + + // Data access. + + Position indexFind(Context& context, const uint32_t partition, const KeyValue& key, const SearchFlag searchFlag) const override; + + Position indexFirst(Context& context, const uint32_t partition) const override; + + Position indexLast(Context& context, const uint32_t partition) const override; + + Position indexNext(Context& context, const Position& position) const override; + + Position indexPrevious(Context& context, const Position& position) const override; + + Position moveNext(Context& context, const Position& position) const override; + + Position movePrevious(Context& context, const Position& position) const override; + + Position moveAbsolute(Context& context, const Position& position) const override; + + Position moveFirst(Context& context, const uint32_t partition) const override; + + Position moveLast(Context& context, const uint32_t partition) const override; + + uint32_t recordsInRange(Context& context, const uint32_t partition, const key_range* minKey, const key_range* maxKey) const override; + + bool readKey(Context& context, const Position& position, const bool forward, + const key_part_map keyPartMap, uint8_t* buffer, const bool keyFormat) const override; + + bool readData(Context& context, const Position& position, uint8_t* buffer, const BlockCacheHint& hint) const override; + + bool updateData(Context& context, const Position& position, const uint8_t* buffer) override; + + // Implementation of AbstractInterval + uint64_t getMin() const override { + return *period_.getLow(); + } + + uint64_t getMax() const override { + return *period_.getUp(); + } + + int compareTo(const AbstractInterval& right) const override { + if (getMin() < right.getMin()) { + return -1; + } else if (getMin() > right.getMin()) { + return 1; + } else { + const uint64_t serial = getSerial(); + const uint64_t rightSerial = static_cast(right).getSerial(); + return serial == rightSerial ? 0 : (serial < rightSerial ? -1 : 1); + } + } + + // Alteration. + + void alter(const Task* task) _THROW_(SparrowException); + + PersistentPartitionGuard alterationDone(const AlterationStats& stats, const uint32_t newIndexAlterSerial); + + AlterationStats createIndex(const uint32_t index, const Task* task) _THROW_(SparrowException); + + AlterationStats dropIndex(const uint32_t index); + + // Specific accessors + + uint32_t getVersion() const { + return version_; + } + + const Master& getMaster() const { + return *master_.get(); + } + + Master& getMaster() { + return *master_.get(); + } + + uint64_t getFileTime() const { + return fileTime_; + } + + void decRecords(const uint32_t records) { + records_ -= records; + } + + void addDataRecords(const uint32_t records) { + dataRecords_ += records; + } + + void setDataSize(const uint64_t dataSize) { + dataSize_ = dataSize; + } + + void addDataSize(const uint64_t dataSize) { + Atomic::add64(&dataSize_, dataSize); + } + + void addIndexSize(const uint64_t indexSize) { + Atomic::add64(&indexSize_, indexSize); + } + + uint64_t getDataRecords() const { + return dataRecords_; + } + + uint64_t getRecordOffset() const { + return recordOffset_; + } + + void addChildPartition(Partition* partition) { + assert(!childPartitions_.contains(partition)); + childPartitions_.insert(partition); + } + + void removeChildPartition(Partition* partition) { + [[maybe_unused]] Partition* p = childPartitions_.remove(partition); + assert(p != NULL); + } + + const ChildPartitions& getChildPartitions() const { + return childPartitions_; + } + + void extendPeriod(const TimePeriod& period) { + period_ = TimePeriod(std::min(period_.getMin(), period.getMin()), std::max(period_.getMax(), period.getMax())); + } + + static void writeFileFormat(ByteBuffer& buffer) _THROW_(SparrowException) { + buffer << static_cast(0) << FileHeader::currentFileFormat_ + << static_cast(0) << static_cast(0); + } + + PersistentPartition* getMainPartition() const { + return mainPartition_; + } + + void setMainPartition(PersistentPartition* mainPartition) { + assert(mainPartition != 0); + mainPartition_ = mainPartition; + } + + uint32_t getFileId(const uint32_t index, const bool isString) const { + return getVersion() >= PersistentPartition::appendVersion_ && isString ? STRING_FILE : index; + } + + PartitionReader* createReader(const uint32_t index, const bool isString, const BlockCacheHint& hint) const _THROW_(SparrowException); + + FileHeaderBase* readHeader(const uint32_t fileId, FileReader& reader) const _THROW_(SparrowException); +}; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, PersistentPartition& partition) { + const uint32_t version = buffer.getVersion(); + if (version < 13) { + partition.version_ = 0; + } else { + buffer >> partition.version_; + } + buffer >> partition.serial_; + if (version >= 20) { + buffer >> partition.dataSerial_; + buffer >> partition.dataRecords_; + buffer >> partition.recordOffset_; + } else { + partition.dataSerial_ = partition.serial_; + partition.recordOffset_ = 0; + } + buffer >> partition.period_; + if (version >= 20) { + buffer >> partition.fileTime_; + } else { + partition.fileTime_ = partition.period_.getMin(); + } + buffer >> partition.records_ + >> partition.dataSize_ >> partition.indexSize_; + if (version < 20) { + partition.dataRecords_ = partition.records_; + } + bool ready = true; + if (version >= 6 && version < 9) { + buffer >> ready; + } + if (version >= 8) { + buffer >> partition.filesystem_; + } + if (version >= 9) { + buffer >> partition.indexAlterSerial_; + } else { + partition.indexAlterSerial_ = ready ? partition.master_->getIndexAlterSerial() : 0; + } + if (version >= 17) { + buffer >> partition.columnAlterSerial_; + } else { + partition.columnAlterSerial_ = 0; + } + if (version >= 22 ) { + buffer >> partition.skippedColumnIds_; + } + + // Force partition reference count to 1. + partition.resetRef(1); + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const PersistentPartition& partition) { + buffer << partition.version_ << partition.serial_ << partition.dataSerial_ << partition.dataRecords_ + << partition.recordOffset_ << partition.period_ << partition.fileTime_ << partition.records_ + << partition.dataSize_ << partition.indexSize_ << partition.filesystem_ << partition.indexAlterSerial_ + << partition.columnAlterSerial_ << partition.skippedColumnIds_; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ComparatorPersistent +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ComparatorPersistent { +private: + + const QueryInfo& queryInfo_; + const TableFields& fields_; + const RecordWrapper& recordWrapper_; + PartitionReader& reader_; + PartitionReader& stringReader_; + const KeyValue& key_; + KeyValue tempKey_; + +public: + + ComparatorPersistent(Context& context, const RecordWrapper& recordWrapper, PartitionReader& reader, PartitionReader& stringReader, + const KeyValue& key, uint8_t* buffer) + : queryInfo_(context.getQueryInfo()), fields_(context.getShare().getMappedFields()), + recordWrapper_(recordWrapper), reader_(reader), stringReader_(stringReader), key_(key), tempKey_(buffer, key.getMap()) { + } + + int compareTo(const uint32_t row) _THROW_(SparrowException) { + reader_.seekRecordData(row); + recordWrapper_.readUsingKeyPartMap(reader_, stringReader_, key_.getMap(), tempKey_.getKey(), true); + return queryInfo_.compareKeys(fields_, tempKey_, key_); + } +}; + +} + +#endif /* #ifndef _engine_persistent_h_ */ diff --git a/storage/sparrow/engine/purge.cc b/storage/sparrow/engine/purge.cc new file mode 100644 index 000000000000..9e1ec71b62ba --- /dev/null +++ b/storage/sparrow/engine/purge.cc @@ -0,0 +1,293 @@ +/* + Database automatic purge. +*/ + +#include "purge.h" +#include "internalapi.h" +#include "persistent.h" +#include "listener.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Purge +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Purge* Purge::purge_ = 0; + +const uint64_t Purge::securityMargin_ = static_cast(1024) * 1024 * 1024; + +// STATIC +void Purge::initialize() _THROW_(SparrowException) { + purge_ = new Purge(); + if (!purge_->start()) { + throw SparrowException::create(false, "Cannot start purge thread"); + } +} + +bool Purge::process() { + SPARROW_ENTER("Purge::process"); + if (!sema_.wait(1000, true)) { + const uint64_t now = Scheduler::now(); + if (now < last_ + 60000) { + return true; + } + } + + if (sparrow_disable_purge) { + last_ = Scheduler::now(); + return true; + } + + // Loop while purge is necessary. + DBUG_PRINT("sparrow_purge", ("Starting purge...")); + const Masters masters = InternalApi::getAll(); + const uint32_t nbMasters = masters.length(); + const Filesystems& filesystems = FileUtil::getFilesystems(false); + uint64_t initialFreeDiskSpace = ULLONG_MAX; + try { + uint64_t limit = 0; + uint64_t total = 0; + uint64_t totalNormalized = 0; + uint32_t i = 0; + uint64_t freeDiskSpace = 0; + bool purged = false; + for (;;) { + PersistentPartitions purgedPartitions; // To delete partitions outside lock. + for (uint32_t j = 0; j < nbMasters; ++j) { + if (i == 0 || (i % nbMasters) == 0) { + freeDiskSpace = FileUtil::getFreeDiskSpace(); + if (initialFreeDiskSpace == ULLONG_MAX) { + initialFreeDiskSpace = freeDiskSpace; + } + total = 0; + totalNormalized = 0; + for (uint32_t i = 0; i < nbMasters; ++i) { + const Master& master = *masters[i]; + ReadGuard guard(master.getLock()); + total += master.getDataSize() + master.getIndexSize(); + totalNormalized += master.getNormalizedSize(); + } + limit = getLimit(freeDiskSpace, total); + DBUG_PRINT("sparrow_purge", ("Free disk space %llu, total used %llu, total normalized %llu, limit %llu", static_cast(freeDiskSpace), + static_cast(total), static_cast(totalNormalized), static_cast(limit))); + Atomic::set64(&SparrowStatus::get().totalSize_, total); + } + Master& master = *masters[i % nbMasters]; + i++; + bool force = false; + PurgeMode mode = PurgeModeControlTaskPerDB::getMode(master.getDatabase()); + if (master.needToPurge(limit, total, totalNormalized, force, mode)) { + // Get the oldest partition of the table. + // It will be deleted when the partition guard goes out of scope. + DBUG_PRINT("sparrow_purge", ("Purged table %s.%s", master.getDatabase().c_str(), master.getTable().c_str())); + PersistentPartitions purgedPartitionsTable; + bool forced = master.purge(purgedPartitionsTable, force, mode); + if (!purgedPartitionsTable.isEmpty()) { + + // Logging if this purge was triggered because an IO operation failed because of the device is full. + if (logResult_) { + uint64_t totalSize = 0; + for (uint k=0; k< purgedPartitionsTable.entries(); ++k) { + totalSize += purgedPartitionsTable[k]->getDataSize() + purgedPartitionsTable[k]->getIndexSize(); + } + totalSize /= 1024*1024; + spw_print_information("Low free disk space, %lluMB. Purging %u partitions, forced %u, for a total size of %lluMB from table %s.%s", + static_cast(freeDiskSpace/(1024*1024)), purgedPartitions.entries(), force, static_cast(totalSize), master.getDatabase().c_str(), master.getTable().c_str()); + } + + // Logging if the partitions still contain valid data (forced purge because disk space is low) + if (forced && sparrow_log_purge_activity) { + uint64_t low = UINT64_MAX; + uint64_t high = 0; + uint64_t totalSize = 0; + for (uint k = 0; k < purgedPartitionsTable.entries(); ++k) { + const PersistentPartition* partition = purgedPartitionsTable[k]; + if (partition->getMin() < low) low = partition->getMin(); + if (partition->getMax() > high) high = partition->getMax(); + totalSize += partition->getDataSize() + partition->getIndexSize(); + } + totalSize /= 1024 * 1024; + const Str low_ts = Str::fromTimestamp(low); + const Str high_ts = Str::fromTimestamp(high); + spw_print_information("Low free disk space, %lluMB (total used is %lluMB while the limit is %lluMB). Forced to purge %u partitions from table %s.%s, for a total size of %lluMB and containing data from %s to %s.", + static_cast(freeDiskSpace/(1024*1024)), static_cast(total/(1024*1024)), static_cast(limit/(1024*1024)), + purgedPartitionsTable.entries(), master.getDatabase().c_str(), master.getTable().c_str(), static_cast(totalSize), low_ts.c_str(), high_ts.c_str()); + } + + purged = true; + break; + } + } + } + if (purgedPartitions.isEmpty()) { + // Nothing to purge. + break; + } + } + + // Check if we need to purge a specific file system. + for (;;) { + if (purged) { + freeDiskSpace = FileUtil::getFreeDiskSpace(); + total = 0; + totalNormalized = 0; + for (uint32_t i = 0; i < nbMasters; ++i) { + const Master& master = *masters[i]; + ReadGuard guard(master.getLock()); + total += master.getDataSize() + master.getIndexSize(); + totalNormalized += master.getNormalizedSize(); + } + limit = getLimit(freeDiskSpace, total); + Atomic::set64(&SparrowStatus::get().totalSize_, total); + } + const uint64_t fsMargin = Purge::getSecurityMargin(); + bool ok = true; + for (uint32_t i = 0; i < filesystems.length(); ++i) { + const uint64_t fsFree = filesystems[i]->getFree(); + if (nbMasters > 0 && fsFree <= fsMargin) { + // A file system is getting full. + // For each master file, get a list of partitions to delete to purge the given file system. + // As we cannot leave "holes" in partitions, we may have to delete partitions from other file systems. + // We choose the master file the closest to its normalized size with the lowest impact on the other + // file systems. + uint64_t minDelta = ULLONG_MAX; + uint32_t minOther = UINT_MAX; + Master* chosenMaster = 0; + PersistentPartitions partitions; + for (uint32_t j = 0; j < nbMasters; ++j) { + Master& master = *masters[j]; + WriteGuard guard(master.getLock()); + PersistentPartitions tmp; + const uint64_t delta = master.listPartitionsForFilesystem(i, tmp, limit, totalNormalized); + const uint32_t n = tmp.length(); + if (n != 0 && n <= minOther) { + if (n < minOther || delta < minDelta) { + chosenMaster = &master; + partitions = tmp; + minDelta = delta; + minOther = n; + } + } + } + if (chosenMaster != 0) { + WriteGuard guard(chosenMaster->getLock()); + if (chosenMaster->purgePartitionsForFilesystem(partitions)) { + if (logResult_) { + uint64_t totalSize = 0; + for (uint k=0; kgetDataSize() + partitions[k]->getIndexSize(); + } + totalSize /= 1024*1024; + spw_print_information("Low free disk space on file system %s: %llu. Purging %u partitions for a total size of %lluMB from table %s.%s", + filesystems[i]->getPath().c_str(), static_cast(fsFree/(1024*1024)), + partitions.entries(), static_cast(totalSize), chosenMaster->getDatabase().c_str(), chosenMaster->getTable().c_str()); + } + purged = true; + } + } + ok = false; + break; + } + } + if (ok) { + break; + } + } + + if (logResult_) { + spw_print_information("Purge result: free disk space changed from %lluMB to %lluMB.", static_cast(initialFreeDiskSpace/(1024*1024)), + static_cast(freeDiskSpace/(1024*1024))); + } + } catch(const SparrowException& e) { + e.toLog(); + } + DBUG_PRINT("sparrow_purge", ("Purge completed")); + last_ = Scheduler::now(); + logResult_ = false; + return true; +} + +// STATIC +uint64_t Purge::getLimit(const uint64_t freeDiskSpace, const uint64_t total) { + // Keep a security margin for each file system. + const uint64_t margin = Purge::getSecurityMargin() * FileUtil::getFilesystems(false).length(); + uint64_t limit = total + freeDiskSpace; + limit = limit > margin ? limit - margin : 0; + if (sparrow_max_disk_size != 0 && limit > sparrow_max_disk_size) { + limit = sparrow_max_disk_size; + } + return limit; +} + +bool Purge::notifyStop() { + Purge::wakeUp(); + return true; +} + +// STATIC +void Purge::wakeUp(bool logResult) { + purge_->logResult_ = logResult; + purge_->sema_.post(); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PurgeTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// To purge zombie directories, every day. +void PurgeTask::run(const uint64_t timestamp) _THROW_(SparrowException) { + SPARROW_ENTER("PurgeTask::run"); + DBUG_PRINT("sparrow_purge", ("Cleaning up zombie data directories")); + const Masters masters = InternalApi::getAll(); + const uint32_t nbMasters = masters.length(); + const Filesystems& filesystems = FileUtil::getFilesystems(true); + for (uint32_t i = 0; i < nbMasters; ++i) { + Master& master = *masters[i]; + uint64_t oldest; + { + ReadGuard guard(master.getLock()); + oldest = master.getOldest(true); + } + struct tm t; + if (oldest != 0) { + oldest -= oldest % 86400000; + const time_t tt = static_cast(oldest / 1000); + if (gmtime_r(&tt, &t) == 0) { + continue; + } + } + const uint32_t limit = oldest == 0 ? UINT_MAX : ((t.tm_year + 1900) * 10000 + (t.tm_mon + 1) * 100 + t.tm_mday); + for (uint32_t j = 0; j < filesystems.length(); ++j) { + try { + // Scan data directory and remove days older than oldest. + char path[FN_REFLEN]; + Files files; + FileUtil::scanDirectory(master.getDataDirectory(j, path), "", 1, files, false); + SYSslistIterator iterator(files); + while (++iterator) { + const Str& file = iterator.key(); + const char* dirname = file.c_str(); + const size_t l = strlen(dirname); + if (l < 8) { + continue; + } + dirname += l - 8; + uint32_t d; + if (sscanf(dirname, "%u", &d) == 1 && d < limit) { +#ifndef NDEBUG + const Str soldest = oldest == 0 ? Str("N/A") : Str::fromTimestamp(oldest); + DBUG_PRINT("sparrow_purge", ("Table %s.%s: removing directory %s because it is older than %s", + master.getDatabase().c_str(), master.getTable().c_str(), file.c_str(), soldest.c_str())); +#endif + FileUtil::deleteDirectory(file.c_str()); + } + } + } catch(const SparrowException& e) { + // Ignore error. + } + } + } +} + +} diff --git a/storage/sparrow/engine/purge.h b/storage/sparrow/engine/purge.h new file mode 100644 index 000000000000..034d88f15731 --- /dev/null +++ b/storage/sparrow/engine/purge.h @@ -0,0 +1,98 @@ +/* + Database automatic purge. +*/ + +#ifndef _engine_purge_h_ +#define _engine_purge_h_ + +#include "../handler/plugin.h" // For configuration parameters. +#include "thread.h" +#include "sema.h" +#include "master.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Purge +////////////////////////////////////////////////////////////////////////////////////////////////////// + +enum PurgeMode { + PURGE_MODE_ON_INSERTION, + PURGE_MODE_CONSTANTLY +}; + +class Purge : public Thread { +private: + + static Purge* purge_; + + Sema sema_; + + uint64_t last_; + + bool logResult_; + + static const uint64_t securityMargin_; + +protected: + + bool process() override; + + bool notifyStop() override; + + bool deleteAfterExit() override { + return false; + } + +public: + + Purge() : Thread("Purge::purge_"), sema_("Purge::sema_", 0), last_(ULLONG_MAX / 2), logResult_(false) { + } + + ~Purge() { + } + + static void initialize() _THROW_(SparrowException); + + static void shutdown() { + if ( purge_ != 0 ) + purge_->stop(); + } + + static void wakeUp(bool logResult = false); + + static uint64_t getSecurityMargin() { + return sparrow_purge_security_margin; + } + + static uint64_t getLimit(const uint64_t freeDiskSpace, const uint64_t total); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PurgeTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PurgeTask : public Task { +public: + + PurgeTask() : Task(Worker::getQueue()) { + } + + virtual bool operator == (const PurgeTask& right) const { + return true; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 86400000L; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException); +}; + +} + +#endif /* #ifndef _engine_purge_h_ */ diff --git a/storage/sparrow/engine/queue.h b/storage/sparrow/engine/queue.h new file mode 100644 index 000000000000..b4afe3dc7f9d --- /dev/null +++ b/storage/sparrow/engine/queue.h @@ -0,0 +1,158 @@ +/* + Message queue. +*/ + +#ifndef _engine_queue_h_ +#define _engine_queue_h_ + +#include "types.h" +#include "list.h" +#include "cond.h" +#include "vec.h" +#include "misc.h" + +#ifdef _WIN32 +#pragma warning(disable:4355) +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Queue +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class MessageThread; +template class Queue : public Lock { + friend class MessageThread; + +private: + + SYSpSlist list_; + const bool bulk_; // If true, all messages in the queue are returned by wait(). + Cond notFull_; + volatile bool stopped_; + const uint32_t capacity_; + + // LIFO stack of idle threads. This enables unecessary idle threads to time out. + SYSpVector, 16> idleThreads_; + +protected: + + virtual void needMoreThreads(const uint32_t count) { + } + + virtual void threadTimedOut(MessageThread* thread) { + } + +public: + + Queue(const char* name, const bool bulk) : Lock(false, name), bulk_(bulk), notFull_(false, *this, (Str(name) + Str("::notFull_")).c_str()), + stopped_(false), capacity_(0) { + } + + Queue(const char* name, const uint32_t capacity, const bool bulk) : Lock(false, name), bulk_(bulk), notFull_(false, *this, (Str(name) + Str("::notFull_")).c_str()), + stopped_(false), capacity_(capacity) { + } + + virtual ~Queue() { + } + + uint32_t getSize() { + Guard guard(*this); + return list_.entries(); + } + + void send(M* message) { + if (stopped_) { + delete message; + return; + } + Guard guard(*this); + while (capacity_ != 0 && list_.entries() == capacity_) { + notFull_.wait(true); + } + list_.append(message); + if (idleThreads_.isEmpty()) { + needMoreThreads(bulk_ ? 1 : list_.entries()); + } else { + idleThreads_.last()->getCond().signal(true); + } + } + + void send(SYSpSlist& list) { + if (list.isEmpty()) { + return; + } + if (stopped_) { + list.clearAndDestroy(); + return; + } + Guard guard(*this); + if (capacity_ == 0) { + list_.appendAll(list); + if (bulk_) { + if (idleThreads_.isEmpty()) { + needMoreThreads(1); + } + } else if (idleThreads_.length() < list_.entries()) { + needMoreThreads(list_.entries() - idleThreads_.length()); + } + } else { + SYSpSlistIterator iterator(list); + while (++iterator) { + while (list_.entries() == capacity_) { + notFull_.wait(true); + } + list_.append(iterator.key()); + } + } + if (!idleThreads_.isEmpty()) { + idleThreads_.last()->getCond().signal(true); + } + } + + bool wait(MessageThread* thread, const uint64_t milliseconds, SYSpSlist& list) { + Guard guard(*this); + if (stopped_) { + return false; + } + bool idle = false; + while (list_.isEmpty()) { + if (!idle) { + idleThreads_.append(thread); + idle = true; + } + if (!thread->getCond().wait(milliseconds, true) || stopped_) { + idleThreads_.remove(thread); + return false; + } + } + if (idle) { + idleThreads_.remove(thread); + } + if (bulk_) { + list_.getAll(list); + } else { + list.append(list_.removeAt(0)); + } + notFull_.signal(true); + return true; + } + + void signal() { + Guard guard(*this); + stopped_ = true; + for (uint32_t i = 0; i < idleThreads_.length(); ++i) { + idleThreads_[i]->getCond().signal(true); + } + list_.clearAndDestroy(); + } + + bool isStopping() const { + return stopped_; + } +}; + +} + +#endif /* #ifndef _engine_queue_h_ */ diff --git a/storage/sparrow/engine/scheduler.cc b/storage/sparrow/engine/scheduler.cc new file mode 100644 index 000000000000..82ccac448aae --- /dev/null +++ b/storage/sparrow/engine/scheduler.cc @@ -0,0 +1,149 @@ +/* + Scheduler thread. +*/ + +#include "scheduler.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Scheduler +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Scheduler* Scheduler::scheduler_ = 0; + +// STATIC +void Scheduler::initialize() _THROW_(SparrowException) { + scheduler_ = new Scheduler(); + if (!scheduler_->start()) { + throw SparrowException::create(false, "Cannot start scheduler thread"); + } +} + +bool Scheduler::process() { + Guard guard(cond_.getLock()); + SYSpHashIterator iterator(tasks_); + uint64_t minTimestamp = ULLONG_MAX; + while (++iterator) { + minTimestamp = std::min(iterator.key()->getTimestamp(), minTimestamp); + } + const uint64_t now = Scheduler::now(); + if (now >= minTimestamp) { + const Tasks key(minTimestamp); + Tasks* tasks = tasks_.remove(&key); + while (!tasks->isEmpty()) { + Task* task = tasks->removeFirst(); + task->getQueue().send(new TaskJob(minTimestamp, task)); + } + delete tasks; + } else { + const uint64_t sleepDuration = std::min(static_cast(5000), minTimestamp - now); + cond_.wait(sleepDuration, true); + } + return true; +} + +bool Scheduler::notifyStop() { + cond_.signalAll(); + return true; +} + +// STATIC +void Scheduler::addTask(Task* task) { + addTask(task, Scheduler::now(), false); +} + +// STATIC +void Scheduler::addTask(Task* task, const uint64_t timestamp, const bool onlyOne /* = false */) { + Guard guard(scheduler_->cond_.getLock()); + addTaskNoLock(task, timestamp, onlyOne); +} + +// STATIC +bool Scheduler::moveTask(Task* task, const uint64_t timestamp) { + Guard guard(scheduler_->cond_.getLock()); + if (!removeTaskNoLock(task)) { + return false; + } + addTaskNoLock(task, timestamp, false); + return true; +} + +// To stop a task, search it in the scheduler queue. If present, re-schedule it asap to +// have the task deleted the normal way. +// STATIC +void Scheduler::stopTask(Task* task) { + Guard guard(scheduler_->cond_.getLock()); + if (removeTaskNoLock(task)) { + addTaskNoLock(task, 0, false, true); + } +} + +// STATIC +bool Scheduler::removeTaskNoLock(Task* task) { + bool found = false; + SYSpHashIterator iterator(scheduler_->tasks_); + while (++iterator) { + Tasks* tasks = iterator.key(); + if (tasks->remove(task)) { + if (tasks->isEmpty()) { + delete scheduler_->tasks_.remove(tasks); + } + found = true; + iterator.reset(); + } + } + return found; +} + +// STATIC +void Scheduler::addTaskNoLock(Task* task, const uint64_t timestamp, const bool remove, const bool first) { + if (remove) { + removeTaskNoLock(task); + } + const Tasks key(timestamp); + Tasks* tasks = scheduler_->tasks_.find(&key); + if (tasks == 0) { + tasks = new Tasks(timestamp); + scheduler_->tasks_.insert(tasks); + } + tasks->add(task, first); + scheduler_->cond_.signalAll(true); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Task +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void Task::stop() { + stopping_ = true; + Scheduler::stopTask(this); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TaskJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void TaskJob::process() { + if (task_->isStopping()) { + delete task_; + } else { + try { + task_->run(timestamp_); + } catch(const SparrowException& e) { + e.toLog(); + } + const uint64_t period = task_->getPeriod(); + if (period == 0) { + delete task_; + } else { + uint64_t timestamp = timestamp_ + period; + while (timestamp < Scheduler::now()) { + timestamp += period; + } + Scheduler::addTask(task_, timestamp, false); + } + } +} + +} diff --git a/storage/sparrow/engine/scheduler.h b/storage/sparrow/engine/scheduler.h new file mode 100644 index 000000000000..b8a6e2878a9d --- /dev/null +++ b/storage/sparrow/engine/scheduler.h @@ -0,0 +1,216 @@ +/* + Scheduler thread. +*/ + +#ifndef _engine_scheduler_h_ +#define _engine_scheduler_h_ + +#include "exception.h" +#include "thread.h" +#include "hash.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Task +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Task : public SYSidlink { +private: + + Queue& queue_; + volatile bool stopping_; + +public: + + Task(Queue& queue) : queue_(queue), stopping_(false) { + } + + virtual ~Task() { + } + + // Task period, in milliseconds. If zero, the task is not scheduled again after its first run. + virtual uint64_t getPeriod() const = 0; + + // Task execution method. + virtual void run(const uint64_t timestamp) _THROW_(SparrowException) = 0; + + // To stop a task. + void stop(); + + bool isStopping() const { + return stopping_ || queue_.isStopping(); + } + + virtual bool operator == (const Task& right) const = 0; + + Queue& getQueue() { + return queue_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TaskJob +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TaskJob : public Job { +private: + + uint64_t timestamp_; + Task* task_; + +public: + + TaskJob(const uint64_t timestamp, Task* task) : timestamp_(timestamp), task_(task) { + } + + void process() override; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Tasks +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Tasks { +private: + + uint64_t timestamp_; // Timestamp in milliseconds. + SYSidlist tasks_; // List of tasks for this timestamp. + +private: + + Tasks& operator = (const Tasks& right); + +public: + + Tasks() : timestamp_(0) { + } + + Tasks(const uint64_t timestamp) : timestamp_(timestamp) { + } + + uint32_t hash() const { + return 31 + static_cast(timestamp_ ^ (timestamp_ >> 32)); + } + + uint64_t getTimestamp() const { + return timestamp_; + } + + void add(Task* task, const bool first) { + if ( first ) { + tasks_.prepend(task); + } else { + tasks_.append(task); + } + } + + bool remove(Task* task) { + if (tasks_.contains(task)) { + tasks_.remove(task); + return true; + } else { + return false; + } + } + + Task* removeFirst() { + return tasks_.removeFirst(); + } + + bool isEmpty() const { + return tasks_.isEmpty(); + } + + bool operator == (const Tasks& right) const { + return timestamp_ == right.timestamp_; + } + + bool contains(const Task& task) { + return tasks_.contains( task ); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Scheduler +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Scheduler : public Thread { + friend class TaskJob; + +private: + + static Scheduler* scheduler_; + + SYSpHash tasks_; + Cond cond_; + const uint64_t start_; + +private: + + static bool removeTaskNoLock(Task* task); + + static void addTaskNoLock(Task* task, const uint64_t timestamp, const bool remove, const bool first=false); + +protected: + + bool process() override; + + bool notifyStop() override; + + bool deleteAfterExit() override { + return false; + } + +public: + + Scheduler() : Thread("Scheduler::scheduler_"), tasks_(16), cond_(false, "Scheduler::cond_"), start_(now()) { + } + + ~Scheduler() { + } + + static void addTask(Task* task, const uint64_t timestamp, const bool remove = false); + + static void addTask(Task* task); + + static void stopTask(Task* task); + + static bool moveTask(Task* task, const uint64_t timestamp); + + static void initialize() _THROW_(SparrowException); + + static void shutdown() { + if ( scheduler_ != NULL ) + scheduler_->stop(); + } + + // Current timestamp, in milliseconds. + static uint64_t now() { +#if defined(_WIN32) + FILETIME ft; + GetSystemTimeAsFileTime(&ft); + const uint64_t t = (static_cast(ft.dwHighDateTime) << 32) + ft.dwLowDateTime; + return (t - 116444736000000000ULL) / 10000; +#else + struct timeval tv; + gettimeofday(&tv, 0); + return (static_cast(tv.tv_sec) * 1000000 + tv.tv_usec) / 1000; +#endif + } + + // Server uptime, in milliseconds. + static uint64_t uptime() { + if (scheduler_ == 0) { + return 0; + } else { + const uint64_t s = scheduler_->start_; + const uint64_t t = now(); + return t > s ? t - s : 0; + } + } +}; + +} + +#endif /* #ifndef _engine_scheduler_h_ */ diff --git a/storage/sparrow/engine/search.h b/storage/sparrow/engine/search.h new file mode 100644 index 000000000000..06c72de956d3 --- /dev/null +++ b/storage/sparrow/engine/search.h @@ -0,0 +1,140 @@ +/* + Search helpers. +*/ + +#ifndef _engine_search_h_ +#define _engine_search_h_ + +#include "partition.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BinarySearch +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class BinarySearch { +public: + + static uint32_t find(C& comparator, const uint32_t start, const uint32_t n, const SearchFlag searchFlag); +}; + +// Performs a binary search to find the record matching the given context's comparator and search searchFlag. +// STATIC +template uint32_t BinarySearch::find(C& comparator, const uint32_t start, const uint32_t n, const SearchFlag searchFlag) { + if (n == 0) { + return UINT_MAX; + } + + bool doSearch = true; + bool found = false; + + // Check lower record. + const uint32_t end = start + n - 1; + uint32_t row = start; + int cmp = comparator.compareTo(row); + if (cmp == 0) { + doSearch = false; + found = true; + } else if (cmp > 0) { // First record greater than key: skip search. + doSearch = false; + } else { + // Check upper record. + row = end; + cmp = comparator.compareTo(row); + if (cmp < 0) { // Last record smaller than key: skip search. + doSearch = false; + } + } + + if (doSearch) { + uint32_t bottom = start; + uint32_t top = end; + while (top > bottom) { + // Compare record and key. + row = (top + bottom) >> 1; + cmp = comparator.compareTo(row); + if (cmp == 0) { + found = true; + break; + } + else if (cmp > 0) { + top = row > start ? row - 1 : start; + } + else { + bottom = row + 1; + } + } + if (!found) { + row = bottom; + if (comparator.compareTo(row) == 0) { + found = true; + } + } + } + if (found) { + // Record found. + if (searchFlag == SearchFlag::EQ || searchFlag == SearchFlag::GE + || searchFlag == SearchFlag::LE) { + // Go down to the first one. + while (row > start && comparator.compareTo(row - 1) == 0) { + row--; + } + } else if (searchFlag == SearchFlag::LE_LAST) { + // Go up to the last one. + while (row + 1 < end && comparator.compareTo(row + 1) == 0) { + row++; + } + } else if (searchFlag == SearchFlag::LT) { + // Go down to the previous one, if any. + if (row == start) { + return UINT_MAX; + } + row--; + while (comparator.compareTo(row) == 0) { + if (row-- == start) { + return UINT_MAX; + } + } + } else if (searchFlag == SearchFlag::GT) { + // Go up to the next one, if any. + if (row == end) { + return UINT_MAX; + } + ++row; + while (comparator.compareTo(row) == 0) { + if (++row > end) { + return UINT_MAX; + } + } + } + } else { + // Record not found. Row is set on the previous record, if any. + if (searchFlag == SearchFlag::EQ) { + return UINT_MAX; + } else if (searchFlag == SearchFlag::LE || searchFlag == SearchFlag::LE_LAST + || searchFlag == SearchFlag::LT) { + // Go up to the largest previous one, if any. + while (row <= end && comparator.compareTo(row) < 0) { + row++; + } + if (row == start) { + return UINT_MAX; + } + row--; + } else if (searchFlag == SearchFlag::GE || searchFlag == SearchFlag::GT) { + // Go up to the smallest next one, if any. + while (row <= end && comparator.compareTo(row) < 0) { + row++; + } + if (row == end + 1) { + return UINT_MAX; + } + } + } + return row; +} + +} + +#endif /* #ifndef _engine_search_h_ */ diff --git a/storage/sparrow/engine/sema.h b/storage/sparrow/engine/sema.h new file mode 100644 index 000000000000..b199c37453c2 --- /dev/null +++ b/storage/sparrow/engine/sema.h @@ -0,0 +1,58 @@ +/* + Semaphore. +*/ + +#ifndef _engine_sema_h_ +#define _engine_sema_h_ + +#include "cond.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Sema +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Sema { +private: + + Cond cond_; + uint32_t volatile count_; + +public: + + Sema(const char* name, const uint32_t count = 0) : cond_(false, (Str(name) + Str("::cond_")).c_str()), count_(count) { + } + + ~Sema() { + } + + void post(const uint32_t count = 1) { + Guard guard(cond_.getLock()); + count_ += count; + cond_.signalAll(true); + } + + bool wait(const uint64_t milliseconds, const bool all = false) { + Guard guard(cond_.getLock()); + while (count_ == 0) { + if (!cond_.wait(milliseconds, true)) { + return false; + } + } + if (all) { + count_ = 0; + } else { + count_--; + } + return true; + } + + bool wait(const bool all = false) { + return wait(0, all); + } +}; + +} + +#endif /* #ifndef _engine_sema_h_ */ diff --git a/storage/sparrow/engine/serial.cc b/storage/sparrow/engine/serial.cc new file mode 100644 index 000000000000..92bf6d7ed975 --- /dev/null +++ b/storage/sparrow/engine/serial.cc @@ -0,0 +1,240 @@ +/* + Serialization. +*/ + +#include "types.h" +#include "serial.h" +#include "listener.h" +#include "../handler/plugin.h" // For configuration parameters. +#include "io.h" +#include "../engine/log.h" +#include +#include +#ifdef _WIN32 +#pragma warning(disable:4355) +#else +#include +#endif + +#include "sql/mysqld.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ByteBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN32 +volatile bool ByteBuffer::lockError_ = false; +bool ByteBuffer::canLock_ = false; +#endif + +// Initialize memory locking if --memlock option is enabled. +// STATIC +void ByteBuffer::initialize() { + SPARROW_ENTER("ByteBuffer::initialize"); + if (!locked_in_memory) { + spw_print_information("Sparrow is not using memory locked pages because memlock option is not enabled"); + return; + } + [[maybe_unused]] const char* warning = "Sparrow cannot use memory locked pages: %s"; +#ifdef _WIN32 + try { + // Set max working set size to the largest possible value. + MEMORYSTATUSEX memStat; + memStat.dwLength = sizeof (memStat); + if (!GlobalMemoryStatusEx(&memStat)) { + throw SparrowException::create(true, "Unable to get memory status"); + } + uint64_t mb = static_cast(memStat.ullTotalPhys / 1024 / 1024); + HANDLE h = GetCurrentProcess(); + while (true) { + mb -= 512; + if (SetProcessWorkingSetSize(h, static_cast(mb * 1024 * 1024), static_cast(mb * 1024 * 1024))) { +#ifndef NDEBUG + SIZE_T minWorkingSet; + SIZE_T maxWorkingSet; + GetProcessWorkingSetSize(h, &minWorkingSet, &maxWorkingSet); + DBUG_PRINT("sparrow_memory", ("Set process WS size; min=%u KB, max=%u KB", static_cast(minWorkingSet / 1024), + static_cast(maxWorkingSet / 1024))); +#endif + break; + } else if (mb <= 512) { + throw SparrowException::create(true, "Unable to set working set size"); + } + } + } catch(const SparrowException& e) { + spw_print_warning(warning, e.getText()); + return; + } + HANDLE hToken; + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) { + return; + } + const TCHAR* privName = TEXT("SeLockMemoryPrivilege"); + TOKEN_PRIVILEGES tp; + if (!LookupPrivilegeValue(0, privName, &tp.Privileges[0].Luid)) { + return; + } + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + const bool status = AdjustTokenPrivileges(hToken, false, &tp, 0, 0, 0); + + // It is possible for AdjustTokenPrivileges to return true and still not succeed. + // So always check for the last error value. + if (!status || GetLastError() != ERROR_SUCCESS) { + SparrowException e = SparrowException::create(true, "Unable to set privilege SeLockMemoryPrivilege"); + spw_print_warning(warning, e.getText()); + return; + } + if (!CloseHandle(hToken)) { + return; + } + canLock_ = true; +#else + if (::mlockall(MCL_CURRENT | MCL_FUTURE) != 0) { + [[maybe_unused]] SparrowException e = SparrowException::create(true, "mlockall error"); + spw_print_warning(warning, e.getText()); + return; + } +#endif + spw_print_information("Sparrow is using memory locked pages"); +} + +// ByteBuffers require page-aligned memory to perform e.g. direct I/O. +// STATIC +uint8_t* ByteBuffer::mmap(const uint32_t size) { +#ifdef _WIN32 + uint8_t* buffer = static_cast(VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + + // Lock pages in memory if possible. + if (canLock_ && buffer != 0) { + try { + if (!VirtualLock(buffer, size)) { + throw SparrowException::create(true, "VirtualLock error"); + } + } catch(const SparrowException& e) { + if (!lockError_) { + lockError_ = true; + spw_print_warning("Sparrow: Memory locking failed: %s", e.getText()); + } + } + } +#else + uint8_t* buffer = reinterpret_cast(::mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0)); + if (buffer == MAP_FAILED) { + buffer = 0; + } +#endif + if (buffer == 0) { + SparrowException e = SparrowException::create(true, "Cannot allocate %u bytes", size); + e.toLog(); + exit(1); + } + return buffer; +} + +// STATIC +bool ByteBuffer::munmap(uint8_t* buffer, const uint32_t size) { +#ifdef _WIN32 + const bool result = VirtualFree(buffer, 0, MEM_RELEASE); +#else + const bool result = ::munmap(reinterpret_cast(buffer), size) == 0; +#endif + if (!result) { + SparrowException e = SparrowException::create(true, "Cannot free %u bytes at address %p", size, buffer); + e.toLog(); + my_abort(); + } + return result; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PrintBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +PrintBuffer::PrintBuffer() : ByteBuffer(new uint8_t[1024], 1024, this) { +} + +PrintBuffer& operator << (PrintBuffer& buffer, const char* v) { + static_cast(buffer) << v; + return buffer; +} + +PrintBuffer& operator << (PrintBuffer& buffer, const Str& v) { + return buffer << v.c_str(); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SocketReader::SocketReader(Connection& connection, ByteBuffer& buffer) _THROW_(SparrowException) + : ByteBuffer(buffer, this), connection_(connection), bytesToRead_(static_cast(buffer.limit())), bytesRead_(0) { + overflow(); +} + +void SocketReader::overflow() _THROW_(SparrowException) { + const int length = static_cast(bytesToRead_) - bytesRead_; + if (length == 0) { + return; + } else if (length < 0) { + throw SparrowException("Reached end of stream", false); + } + int received = 0; + if (!connection_.isClosed()) { + received = recv(connection_.getSocket(), reinterpret_cast(getData() + bytesRead_), length, 0); +#ifdef _WIN32 + if (GetLastError() == WSAECONNRESET) { + // Treat connection reset as a normal close. + received = 0; + } +#endif + } + if (received == -1) { + throw SparrowException::create(true, "Error while reading data from socket"); + } else if (received == 0) { + throw SparrowException("Connection closed", false, 0); + } + bytesRead_ += received; + limit(bytesRead_); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SocketWriter::SocketWriter(Connection& connection) + : ByteBuffer(IOContext::getBuffer(sparrow_transfer_block_size), this), connection_(connection) { +} + +SocketWriter::~SocketWriter() { + flush(); +} + +void SocketWriter::flush() _THROW_(SparrowException) { + uint32_t length = 0; + while (length < position()) { + bool closed = false; + if (connection_.isClosed()) { + closed = true; + } else { + const int sent = send(connection_.getSocket(), reinterpret_cast(getData() + length), static_cast(position() - length), 0); + if (sent == -1) { +#ifdef _WIN32 + if (GetLastError() == WSAECONNRESET) { + closed = true; + } else +#endif + throw SparrowException::create(true, "Error while writing data to socket"); + } + length += sent; + } + if (closed) { + throw SparrowException("Connection closed", false, 0); + } + } +} + +} + diff --git a/storage/sparrow/engine/serial.h b/storage/sparrow/engine/serial.h new file mode 100644 index 000000000000..646c7d4162f9 --- /dev/null +++ b/storage/sparrow/engine/serial.h @@ -0,0 +1,724 @@ +/* + Serialization. +*/ + +#ifndef _engine_serial_h_ +#define _engine_serial_h_ + +#include "exception.h" +#include "interval.h" +#include "vec.h" +#include "lock.h" +#include "hash.h" + +// Use little endian to get better performances on x86 and x64 (around 15% faster). +// If you change endianness, make sure the Java side is updated too (see SparrowBuffer.getByteOrder()). +// Of course, changing endianness breaks existing files. +#define SPARROW_LITTLE_ENDIAN 1 + +// Marshalling/unmarshalling macros. SLOW macros are used when memory crosses the buffer limit, +// FAST macros are used otherwise. + +// Macros for little endian ordering. +//----------------------------------- + +#define SLOW_STORE2_L(V) do { put((uint8_t)(V)); put((uint8_t)((V) >> 8)); } while(0) +#define SLOW_STORE4_L(V) do { put((uint8_t)(V)); put((uint8_t)((V) >> 8)); \ + put((uint8_t)((V) >> 16)); put((uint8_t)((V) >> 24)); } while(0) +#define SLOW_STORE8_L(V) do { put((uint8_t)(V)); put((uint8_t)((V) >> 8)); \ + put((uint8_t)((V) >> 16)); put((uint8_t)((V) >> 24)); put((uint8_t)((V) >> 32)); \ + put((uint8_t)((V) >> 40)); put((uint8_t)((V) >> 48)); put((uint8_t)((V) >> 56)); } while(0) +#define SLOW_LOAD2_L(V) do { (V) = get(); V |= (get() << 8); } while(0) +#define SLOW_LOAD4_L(V) do { (V) = get(); V |= (get() << 8); V |= (get() << 16); V |= (get() << 24); } while(0) +#define SLOW_LOAD8_L(V) do { (V) = (uint64_t)get(); V |= ((uint64_t)get() << 8); V |= ((uint64_t)get() << 16); V |= ((uint64_t)get() << 24); \ + V |= ((uint64_t)get() << 32); V |= ((uint64_t)get() << 40); V |= ((uint64_t)get() << 48); V |= ((uint64_t)get() << 56); } while(0) +#ifdef WORDS_BIGENDIAN // Macros for OS that use Big Endian +#define FAST_STORE2_L(P,V) do { *(P) = (uint8_t)(V); *((P) + 1) = (uint8_t)((V) >> 8); } while(0) +#define FAST_STORE4_L(P,V) do { *(P) = (uint8_t)(V); *((P) + 1) = (uint8_t)((V) >> 8); \ + *((P) + 2) = (uint8_t)((V) >> 16); *((P) + 3) = (uint8_t)((V) >> 24); } while(0) +#define FAST_STORE8_L(P,V) do { *(P) = (uint8_t)(V); *((P) + 1) = (uint8_t)((V) >> 8); \ + *((P) + 2) = (uint8_t)((V) >> 16); *((P) + 3) = (uint8_t)((V) >> 24); *((P) + 4) = (uint8_t)((V) >> 32); \ + *((P) + 5) = (uint8_t)((V) >> 40); *((P) + 6) = (uint8_t)((V) >> 48); *((P) + 7) = (uint8_t)((V) >> 56); } while(0) +#define FAST_LOAD2_L(P, V) do { uint16_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 1); *(ptemp + 1) = *(P); (V) = vtemp; } while(0) +#define FAST_LOAD4_L(P, V) do { uint32_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 3); *(ptemp + 1) = *((P) + 2); *(ptemp + 2) = *((P) + 1); *(ptemp + 3) = *(P); \ + (V) = vtemp; } while(0) +#define FAST_LOAD8_L(P, V) do { uint64_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 7); *(ptemp + 1) = *((P) + 6); *(ptemp + 2) = *((P) + 5); *(ptemp + 3) = *((P) + 4); \ + *(ptemp + 4) = *((P) + 3); *(ptemp + 5) = *((P) + 2); *(ptemp + 6) = *((P) + 1); *(ptemp + 7) = *(P); \ + (V) = vtemp; } while(0) +#else // Macros for OS that use Little Endian +#define FAST_STORE2_L(P,V) do { *((uint16_t*)(P)) = (uint16_t)(V); } while(0) +#define FAST_STORE4_L(P,V) do { *((uint32_t*)(P)) = (uint32_t)(V); } while(0) +#define FAST_STORE8_L(P,V) do { *((uint64_t*)(P)) = (uint64_t)(V); } while(0) +#define FAST_LOAD2_L(P,V) do { V = *((uint16_t*)(P)); } while(0) +#define FAST_LOAD4_L(P,V) do { V = *((uint32_t*)(P)); } while(0) +#define FAST_LOAD8_L(P,V) do { V = *((uint64_t*)(P)); } while(0) +#endif + +// Macros for big endian ordering. +//-------------------------------- + +#define SLOW_STORE2_B(V) do { put((uint8_t)((V) >> 8)); put((uint8_t)(V)); } while(0) +#define SLOW_STORE4_B(V) do { put((uint8_t)((V) >> 24)); put((uint8_t)((V) >> 16)); \ + put((uint8_t)((V) >> 8)); put((uint8_t)(V)); } while(0) +#define SLOW_STORE8_B(V) do { put((uint8_t)((V) >> 56)); put((uint8_t)((V) >> 48)); \ + put((uint8_t)((V) >> 40)); put((uint8_t)((V) >> 32)); put((uint8_t)((V) >> 24)); \ + put((uint8_t)((V) >> 16)); put((uint8_t)((V) >> 8)); put((uint8_t)(V)); } while(0) +#define SLOW_LOAD2_B(V) do { (V) = (get() << 8); V |= get(); } while(0) +#define SLOW_LOAD4_B(V) do { (V) = (get() << 24); V |= (get() << 16); V |= (get() << 8); V |= get(); } while(0) +#define SLOW_LOAD8_B(V) do { (V) = ((uint64_t)get() << 56); V |= ((uint64_t)get() << 48); V |= ((uint64_t)get() << 40); \ + V |= ((uint64_t)get() << 32); V |= ((uint64_t)get() << 24); V |= ((uint64_t)get() << 16); V |= ((uint64_t)get() << 8); V |= (uint64_t)get(); } while(0) +#ifdef WORDS_BIGENDIAN // Macros for OS that use Big Endian +#define FAST_STORE2_B(P,V) do { memcpy((uint8_t*)(P), (uint8_t*)(&V), 2); } while(0) +#define FAST_STORE3_B(P,V) do { memcpy((uint8_t*)(P), (uint8_t*)(&V), 3); } while(0) +#define FAST_STORE4_B(P,V) do { memcpy((uint8_t*)(P), (uint8_t*)(&V), 4); } while(0) +#define FAST_STORE8_B(P,V) do { memcpy((uint8_t*)(P), (uint8_t*)(&V), 8); } while(0) +#define FAST_LOAD2_B(P, V) do { memcpy((uint8_t*)(&V), (uint8_t*)(P), 2); } while(0) +#define FAST_LOAD3_B(P, V) do { (V) = 0; memcpy((uint8_t*)(&V), (uint8_t*)(P), 3); } while(0) +#define FAST_LOAD4_B(P, V) do { memcpy((uint8_t*)(&V), (uint8_t*)(P), 4); } while(0) +#define FAST_LOAD8_B(P, V) do { memcpy((uint8_t*)(&V), (uint8_t*)(P), 8); } while(0) +#else // Macros for OS that use Little Endian +#define FAST_STORE2_B(P,V) do { *(P) = (uint8_t)((V) >> 8); *((P) + 1) = (uint8_t)(V); } while(0) +#define FAST_STORE3_B(P,V) do { *(P) = (uint8_t)((V) >> 16); *((P) + 1) = (uint8_t)((V) >> 8); \ + *((P) + 2) = (uint8_t)(V); } while(0) +#define FAST_STORE4_B(P,V) do { *(P) = (uint8_t)((V) >> 24); *((P) + 1) = (uint8_t)((V) >> 16); \ + *((P) + 2) = (uint8_t)((V) >> 8); *((P) + 3) = (uint8_t)(V); } while(0) +#define FAST_STORE8_B(P,V) do { *(P) = (uint8_t)((V) >> 56); *((P) + 1) = (uint8_t)((V) >> 48); \ + *((P) + 2) = (uint8_t)((V) >> 40); *((P) + 3) = (uint8_t)((V) >> 32); *((P) + 4) = (uint8_t)((V) >> 24); \ + *((P) + 5) = (uint8_t)((V) >> 16); *((P) + 6) = (uint8_t)((V) >> 8); *((P) + 7) = (uint8_t)(V); } while(0) +#define FAST_LOAD2_B(P, V) do { uint16_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 1); *(ptemp + 1) = *(P); (V) = vtemp; } while(0) +#define FAST_LOAD3_B(P, V) do { uint32_t vtemp = 0; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 2); *(ptemp + 1) = *((P) + 1); *(ptemp + 2) = *(P); \ + (V) = vtemp; } while(0) +#define FAST_LOAD4_B(P, V) do { uint32_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 3); *(ptemp + 1) = *((P) + 2); *(ptemp + 2) = *((P) + 1); *(ptemp + 3) = *(P); \ + (V) = vtemp; } while(0) +#define FAST_LOAD8_B(P, V) do { uint64_t vtemp; uint8_t* ptemp = (uint8_t*)&vtemp; \ + *(ptemp) = *((P) + 7); *(ptemp + 1) = *((P) + 6); *(ptemp + 2) = *((P) + 5); *(ptemp + 3) = *((P) + 4); \ + *(ptemp + 4) = *((P) + 3); *(ptemp + 5) = *((P) + 2); *(ptemp + 6) = *((P) + 1); *(ptemp + 7) = *(P); \ + (V) = vtemp; } while(0) +#endif + +// Macros to encode Sparrow data. + +#ifdef SPARROW_LITTLE_ENDIAN +#define SLOW_STORE2(V) SLOW_STORE2_L(V) +#define SLOW_STORE4(V) SLOW_STORE4_L(V) +#define SLOW_STORE8(V) SLOW_STORE8_L(V) +#define SLOW_LOAD2(V) SLOW_LOAD2_L(V) +#define SLOW_LOAD4(V) SLOW_LOAD4_L(V) +#define SLOW_LOAD8(V) SLOW_LOAD8_L(V) +#define FAST_STORE2(P,V) FAST_STORE2_L(P,V) +#define FAST_STORE4(P,V) FAST_STORE4_L(P,V) +#define FAST_STORE8(P,V) FAST_STORE8_L(P,V) +#define FAST_LOAD2(P, V) FAST_LOAD2_L(P, V) +#define FAST_LOAD4(P, V) FAST_LOAD4_L(P, V) +#define FAST_LOAD8(P, V) FAST_LOAD8_L(P, V) +#else +#define SLOW_STORE2(V) SLOW_STORE2_B(V) +#define SLOW_STORE4(V) SLOW_STORE4_B(V) +#define SLOW_STORE8(V) SLOW_STORE8_B(V) +#define SLOW_LOAD2(V) SLOW_LOAD2_B(V) +#define SLOW_LOAD4(V) SLOW_LOAD4_B(V) +#define SLOW_LOAD8(V) SLOW_LOAD8_B(V) +#define FAST_STORE2(P,V) FAST_STORE2_B(P,V) +#define FAST_STORE4(P,V) FAST_STORE4_B(P,V) +#define FAST_STORE8(P,V) FAST_STORE8_B(P,V) +#define FAST_LOAD2(P, V) FAST_LOAD2_B(P, V) +#define FAST_LOAD4(P, V) FAST_LOAD4_B(P, V) +#define FAST_LOAD8(P, V) FAST_LOAD8_B(P, V) +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ByteBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Abstract class to handle buffer overflows. +class ByteBufferOverflow { +public: + + virtual ~ByteBufferOverflow() { + } + virtual void overflow() _THROW_(SparrowException) = 0; + virtual bool end() const = 0; +}; + +class ByteBuffer { +protected: + + uint8_t* data_; + uint64_t limit_; + uint64_t pos_; + ByteBufferOverflow* overflow_; + uint32_t version_; + +#ifdef _WIN32 + static volatile bool lockError_; + static bool canLock_; +#endif + +private: + + void overflow() _THROW_(SparrowException); + uint8_t get(); + void put(const uint8_t v); + +public: + + ByteBuffer(const uint8_t* data, const uint64_t limit); + ByteBuffer(uint8_t* data, const uint64_t limit, ByteBufferOverflow* overflow = 0, const uint32_t version = UINT_MAX); + ByteBuffer(const ByteBuffer& buffer, ByteBufferOverflow* overflow); + + uint8_t* getData(); + uint8_t* getCurrentData(); + const uint8_t* getData() const; + const uint8_t* getCurrentData() const; + void position(const uint64_t pos); + uint64_t position() const; + uint64_t limit() const; + void limit(const uint64_t newLimit); + bool end() const; + void advance(uint64_t offset); + uint32_t getVersion() const; + void setVersion(const uint32_t version); + + ByteBuffer& operator << (const uint8_t v); + ByteBuffer& operator << (const int8_t v); + ByteBuffer& operator << (const uint16_t v); + ByteBuffer& operator << (const int16_t v); + ByteBuffer& operator << (const uint32_t v); + ByteBuffer& operator << (const int32_t v); + ByteBuffer& operator << (const uint64_t v); + ByteBuffer& operator << (const int64_t v); + ByteBuffer& operator << (const double v); + ByteBuffer& operator << (const bool v); + ByteBuffer& operator << (const ByteBuffer& v); + ByteBuffer& operator << (const char* v); + + ByteBuffer& operator >> (uint8_t& v); + ByteBuffer& operator >> (int8_t& v); + ByteBuffer& operator >> (uint16_t& v); + ByteBuffer& operator >> (int16_t& v); + ByteBuffer& operator >> (uint32_t& v); + ByteBuffer& operator >> (int32_t& v); + ByteBuffer& operator >> (uint64_t& v); + ByteBuffer& operator >> (int64_t& v); + ByteBuffer& operator >> (double& v); + ByteBuffer& operator >> (bool& v); + ByteBuffer& operator >> (ByteBuffer& v); + + static void initialize(); + static uint8_t* mmap(const uint32_t size); + static bool munmap(uint8_t* buffer, const uint32_t size); +}; + +inline void ByteBuffer::overflow() _THROW_(SparrowException) { + if (overflow_ == 0) { + throw SparrowException::create(false, "Buffer overflow (limit=%llu bytes)", static_cast(limit_)); + } else { + overflow_->overflow(); + } +} + +inline uint8_t ByteBuffer::get() { + if (pos_ >= limit_) { + overflow(); + } + return data_[pos_++]; +} + +inline void ByteBuffer::put(const uint8_t v) { + if (pos_ >= limit_) { + overflow(); + } + data_[pos_++] = v; +} + +inline ByteBuffer::ByteBuffer(const uint8_t* data, const uint64_t limit) + : data_(const_cast(data)), limit_(limit), pos_(0), overflow_(0), version_(UINT_MAX) { +} + +inline ByteBuffer::ByteBuffer(uint8_t* data, const uint64_t limit, ByteBufferOverflow* overflow /* = 0 */, const uint32_t version /* = UINT_MAX */) + : data_(data), limit_(limit), pos_(0), overflow_(overflow), version_(version) { +} + +inline ByteBuffer::ByteBuffer(const ByteBuffer& buffer, ByteBufferOverflow* overflow) { + data_ = buffer.data_; + limit_ = buffer.limit_; + pos_ = buffer.pos_; + version_ = buffer.version_; + overflow_ = overflow; +} + +inline uint8_t* ByteBuffer::getData() { + return data_; +} + +inline uint8_t* ByteBuffer::getCurrentData() { + return data_ + pos_; +} + +inline const uint8_t* ByteBuffer::getData() const { + return data_; +} + +inline const uint8_t* ByteBuffer::getCurrentData() const { + return data_ + pos_; +} + +inline void ByteBuffer::position(const uint64_t pos) { + pos_ = pos; +} + +inline uint64_t ByteBuffer::position() const { + return pos_; +} + +inline uint64_t ByteBuffer::limit() const { + return limit_; +} + +inline void ByteBuffer::limit(const uint64_t newLimit) { + limit_ = newLimit; +} + +inline bool ByteBuffer::end() const { + return overflow_ == 0 ? (pos_ == limit_) : overflow_->end(); +} + +inline void ByteBuffer::advance(uint64_t offset) { + while (offset > 0) { + if (pos_ >= limit_) { + overflow(); + } + const uint64_t length = std::min(limit_ - pos_, offset); + pos_ += length; + offset -= length; + } +} + +inline uint32_t ByteBuffer::getVersion() const { + return version_; +} + +inline void ByteBuffer::setVersion(const uint32_t version) { + version_ = version; +} + +inline ByteBuffer& ByteBuffer::operator << (const uint8_t v) { + put(v); + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int8_t v) { + return *this << static_cast(v); +} + +inline ByteBuffer& ByteBuffer::operator << (const uint16_t v) { + const uint64_t npos = pos_ + 2; + if (npos > limit_) { + SLOW_STORE2(v); + } else { + uint8_t* p = data_ + pos_; + FAST_STORE2(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int16_t v) { + return *this << static_cast(v); +} + +inline ByteBuffer& ByteBuffer::operator << (const uint32_t v) { + const uint64_t npos = pos_ + 4; + if (npos > limit_) { + SLOW_STORE4(v); + } else { + uint8_t* p = data_ + pos_; + FAST_STORE4(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int32_t v) { + return *this << static_cast(v); +} + +inline ByteBuffer& ByteBuffer::operator << (const uint64_t v) { + const uint64_t npos = pos_ + 8; + if (npos > limit_) { + SLOW_STORE8(v); + } else { + uint8_t* p = data_ + pos_; + FAST_STORE8(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const int64_t v) { + return *this << static_cast(v); +} + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + +inline ByteBuffer& ByteBuffer::operator << (const double v) { + return *this << *reinterpret_cast(&v); +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +inline ByteBuffer& ByteBuffer::operator << (const bool v) { + return *this << static_cast(v ? 1 : 0); +} + +inline ByteBuffer& ByteBuffer::operator << (const ByteBuffer& v) { + const uint64_t limit = v.limit_; + uint64_t pos = v.pos_; + while (pos < limit) { + if (pos_ >= limit_) { + overflow(); + } + const uint64_t length = std::min(limit_ - pos_, limit - pos); + memcpy(data_ + pos_, v.data_ + pos, length); + pos_ += length; + pos += length; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator << (const char* v) { + return *this << ByteBuffer(reinterpret_cast(v), static_cast(strlen(v))); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint8_t& v) { + v = get(); + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int8_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint16_t& v) { + const uint64_t npos = pos_ + 2; + if (npos > limit_) { + SLOW_LOAD2(v); + } else { + uint8_t* p = data_ + pos_; + FAST_LOAD2(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int16_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint32_t& v) { + const uint64_t npos = pos_ + 4; + if (npos > limit_) { + SLOW_LOAD4(v); + } else { + uint8_t* p = data_ + pos_; + FAST_LOAD4(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int32_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (uint64_t& v) { + const uint64_t npos = pos_ + 8; + if (npos > limit_) { + SLOW_LOAD8(v); + } else { + uint8_t* p = data_ + pos_; + FAST_LOAD8(p, v); + pos_ = npos; + } + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (int64_t& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (double& v) { + return *this >> *(reinterpret_cast(&v)); +} + +inline ByteBuffer& ByteBuffer::operator >> (bool& v) { + uint8_t b; + *this >> b; + v = (b != 0); + return *this; +} + +inline ByteBuffer& ByteBuffer::operator >> (ByteBuffer& v) { + while (v.pos_ < v.limit_) { + if (pos_ >= limit_) { + overflow(); + } + const uint64_t length = std::min(limit_ - pos_, v.limit_ - v.pos_); + memcpy(v.data_ + v.pos_, data_ + pos_, length); + pos_ += length; + v.pos_ += length; + } + return *this; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, Interval& interval) { + T low; + bool lowerSet; + buffer >> lowerSet; + if (lowerSet) { + buffer >> low; + } + T up; + bool upperSet; + buffer >> upperSet; + if (upperSet) { + buffer >> up; + } + bool lowerIncluded, upperIncluded; + buffer >> lowerIncluded >> upperIncluded; + interval = Interval(lowerSet ? &low : 0, upperSet ? &up : 0, lowerIncluded, upperIncluded); + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const Interval& interval) { + const T* low = interval.getLow(); + buffer << (low != 0); + if (low != 0) { + buffer << *low; + } + const T* up = interval.getUp(); + buffer << (up != 0); + if (up != 0) { + buffer << *up; + } + buffer << interval.isLowerIncluded() << interval.isUpperIncluded(); + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSarray& v) { + buffer << static_cast(v.length()); + for (uint32_t i = 0; i < v.length(); ++i) { + buffer << v[i]; + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSarray& v) { + uint32_t length; + buffer >> length; + v = SYSarray(length); + for (uint32_t i = 0; i < length; ++i) { + buffer >> v[i]; + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSvector& v) { + buffer << static_cast(v.length()); + for (uint32_t i = 0; i < v.length(); ++i) { + buffer << v[i]; + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSvector& v) { + uint32_t length; + buffer >> length; + v.resize(length); + for (uint32_t i = 0; i < length; ++i) { + T t; + buffer >> t; + v.append(t); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSpVector& v) { + buffer << static_cast(v.length()); + for (uint32_t i = 0; i < v.length(); ++i) { + buffer << *(v[i]); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSpVector& v) { + uint32_t length; + buffer >> length; + v.resize(length); + for (uint32_t i = 0; i < length; ++i) { + T* t = new T(); + buffer >> *t; + v.append(t); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSpSortedVector& v) { + buffer << static_cast(v.length()); + for (uint32_t i = 0; i < v.length(); ++i) { + buffer << *(v[i]); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSpSortedVector& v) { + uint32_t length; + buffer >> length; + v.resize(length); + for (uint32_t i = 0; i < length; ++i) { + T* t = new T(); + buffer >> *t; + v.append(t); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYShash& h) { + buffer << h.entries(); + SYShashIterator iterator(h); + while (++iterator) { + buffer << iterator.key(); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYShash& h) { + uint32_t entries; + buffer >> entries; + for (uint32_t i = 0; i < entries; ++i) { + T entry; + buffer >> entry; + h.insert(entry); + } + return buffer; +} + +template inline ByteBuffer& operator << (ByteBuffer& buffer, const SYSpHash& h) { + buffer << h.entries(); + SYSpHashIterator iterator(h); + while (++iterator) { + buffer << *iterator.key(); + } + return buffer; +} + +template inline ByteBuffer& operator >> (ByteBuffer& buffer, SYSpHash& h) { + uint32_t entries; + buffer >> entries; + for (uint32_t i = 0; i < entries; ++i) { + T* entry = new T(); + buffer >> *entry; + h.insert(entry); + } + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PrintBuffer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PrintBuffer : public ByteBuffer, public ByteBufferOverflow { +public: + + PrintBuffer(); + + virtual ~PrintBuffer() { + delete [] data_; + } + + void overflow() override _THROW_(SparrowException) { + uint8_t* save = data_; + data_ = new uint8_t[limit_ * 2]; + memcpy(data_, save, limit_); + limit_ *= 2; + delete [] save; + } + + bool end() const override { + return false; + } +}; + +PrintBuffer& operator << (PrintBuffer& buffer, const char* v); +class Str; +PrintBuffer& operator << (PrintBuffer& buffer, const Str& v); + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Connection; +class SocketReader : public ByteBuffer, public ByteBufferOverflow { +private: + + Connection& connection_; + const uint32_t bytesToRead_; + int bytesRead_; + +public: + + SocketReader(Connection& connection, ByteBuffer& buffer) _THROW_(SparrowException); + + virtual ~SocketReader() { + } + + void overflow() override _THROW_(SparrowException); + + bool end() const override { + return bytesRead_ == static_cast(bytesToRead_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SocketWriter : public ByteBuffer, public ByteBufferOverflow { +private: + + Connection& connection_; + +public: + + SocketWriter(Connection& connection); + + virtual ~SocketWriter(); + + void overflow() override _THROW_(SparrowException) { + flush(); + position(0); + } + + void flush() _THROW_(SparrowException); + + bool end() const override { + return false; // No EOF when writing. + } +}; + +} + +#endif /* #ifndef _engine_serial_h_ */ diff --git a/storage/sparrow/engine/socketutil.cc b/storage/sparrow/engine/socketutil.cc new file mode 100644 index 000000000000..4444827ec852 --- /dev/null +++ b/storage/sparrow/engine/socketutil.cc @@ -0,0 +1,203 @@ +/* + Socket utilities. +*/ + +#include "socketutil.h" +#include "../functions/ipaddress.h" + +#ifndef _WIN32 +#include +#endif + +namespace Sparrow { + +using namespace IvFunctions; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketUtil +////////////////////////////////////////////////////////////////////////////////////////////////////// + +my_socket SocketUtil::stopSocket_ = INVALID_SOCKET; +SocketAddress SocketUtil::stopSocketAddress_; +bool SocketUtil::v6_ = false; + +// STATIC +void SocketUtil::initialize() _THROW_(SparrowException) { +#ifdef _WIN32 + // Initialize Winsock 2.2. + WSADATA wsaData; + WSAStartup(MAKEWORD(2, 2), &wsaData); +#endif + + // Checks whether IPv6 is supported by trying to create a dummy IPv6 socket. + my_socket socketId = socket(AF_INET6, SOCK_STREAM, IPPROTO_IPV6); + if (socketId != INVALID_SOCKET) { + closesocket(socketId); + v6_ = true; + } + + // Create the stop socket. + try { + stopSocket_ = SocketUtil::create(SOCK_DGRAM, SocketUtil::getAddress("127.0.0.1", 0)); + stopSocketAddress_ = SocketUtil::getAddress(stopSocket_); + } catch(const SparrowException&) { + // Ignore error: without stop socket, shutdown will be slower - not a big deal. + } +} + +// Creates and binds a socket of the given type to the given address. +// STATIC +my_socket SocketUtil::create(int type, const SocketAddress& socketAddress) _THROW_(SparrowException) { + // Create socket. + my_socket socketId = socket(socketAddress.isV6() ? AF_INET6 : AF_INET, type, 0); + if (socketId == INVALID_SOCKET) { + throw SparrowException::create(true, "Cannot create socket"); + } + + // Bind socket. + int dummy = 1; + setsockopt(socketId, SOL_SOCKET, SO_REUSEADDR, (char*)&dummy, sizeof(dummy)); + if (bind(socketId, socketAddress.getSockAddr(), socketAddress.getSockAddrLength()) != 0) { + SparrowException e = SparrowException::create(true, "Cannot bind socket to address %s", socketAddress.print().c_str()); + closesocket(socketId); + throw e; + } + return socketId; +} + +// Gets binding address from its textual representation. +// STATIC +SocketAddress SocketUtil::getAddress(const char* address, uint32_t port) _THROW_(SparrowException) { + uint8_t buffer[16] = {0}; + IpAddress ipAddress(buffer, sizeof(buffer)); + const bool isIpAddress = address == 0 ? false : ipAddress.parse(address, static_cast(strlen(address))); + if (v6_) { + struct sockaddr_in6 socketAddress; + memset(&socketAddress, 0, sizeof(socketAddress)); + socketAddress.sin6_family = AF_INET6; + socketAddress.sin6_port = htons(static_cast(port)); + if (address == 0 || strlen(address) == 0) { + // No address. + socketAddress.sin6_addr = in6addr_any; + } else if (isIpAddress) { + // Address given. +#ifdef _WIN32 + int s = sizeof(socketAddress); + const bool ok = WSAStringToAddress(const_cast(address), AF_INET6, 0, reinterpret_cast(&socketAddress), &s) == 0; +#else + const bool ok = inet_pton(AF_INET6, address, &socketAddress.sin6_addr) == 1; +#endif + if (!ok) { + throw SparrowException::create(true, "Cannot convert address \"%s\" to IPv6 internal format", address); + } + } else { + // Address given as a name: try DNS resolution. + char buffer[16]; + snprintf(buffer, sizeof(buffer), "%u", port); + struct addrinfo* result; + struct addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET6; + const int code = getaddrinfo(address, buffer, &hints, &result); + if (code != 0) { + throw SparrowException::create(false, "Cannot get address info for \"%s\", port %u (%s)", + address, port, gai_strerror(code)); + } + memcpy(&socketAddress, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); + } + return SocketAddress(socketAddress); + } else { + struct sockaddr_in socketAddress; + memset(&socketAddress, 0, sizeof(socketAddress)); + socketAddress.sin_family = AF_INET; + socketAddress.sin_port = htons(static_cast(port)); + if (address == 0 || strlen(address) == 0) { + // No address. + socketAddress.sin_addr.s_addr = htonl(INADDR_ANY); + } else if (isIpAddress) { + // Address given. + ulong addr = inet_addr(address); + if (addr == static_cast(INADDR_NONE)) { + addr = htonl(INADDR_ANY); + } + socketAddress.sin_addr.s_addr = addr; + } else { + // Address given as a name: try DNS resolution. + char buffer[16]; + snprintf(buffer, sizeof(buffer), "%u", port); + struct addrinfo* result = 0; + struct addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + const int code = getaddrinfo(address, buffer, &hints, &result); + if (code != 0) { + throw SparrowException::create(false, "Cannot get address info for \"%s\", port %u (%s)", + address, port, gai_strerror(code)); + } + memcpy(&socketAddress, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); + } + return SocketAddress(socketAddress); + } +} + +// Gets the address the given socket is bound to. +// STATIC +SocketAddress SocketUtil::getAddress(my_socket socket) _THROW_(SparrowException) { + struct sockaddr_in6 socketAddress; + socklen_t length = sizeof(socketAddress); + if (getsockname(socket, reinterpret_cast(&socketAddress), &length) != 0) { + throw SparrowException::create(true, "Cannot get bind address of socket"); + } + if (length == sizeof(socketAddress)) { + return SocketAddress(socketAddress); + } else { + return SocketAddress(*reinterpret_cast(&socketAddress)); + } +} + +// STATIC +bool SocketUtil::notifyStopSocket() { + if (stopSocket_ == INVALID_SOCKET) { + return false; + } + const char* message = "stop"; + int sent = sendto(stopSocket_, message, static_cast(strlen(message)), 0, + stopSocketAddress_.getSockAddr(), stopSocketAddress_.getSockAddrLength()); + if (sent < 0) { + try { + throw SparrowException::create(true, "Cannot notify stop socket"); + } catch(const SparrowException& e) { + e.toLog(); + } + } + return true; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketAddress +////////////////////////////////////////////////////////////////////////////////////////////////////// + +using namespace IvFunctions; + +// Prints an IP address and port number. +Str SocketAddress::print() const { + char buffer[128]; + char* s = buffer; + unsigned int port; + if (v6_) { + port = static_cast(ntohs(raw_.v6_.sin6_port)); + *s++ = '['; + s += IpAddress(reinterpret_cast(&raw_.v6_.sin6_addr), 16).print(s); + *s++ = ']'; + } else { + port = static_cast(ntohs(raw_.v4_.sin_port)); + s += IpAddress(reinterpret_cast(&raw_.v4_.sin_addr), 4).print(s); + } + sprintf(s, ":%u", port); + return Str(buffer, static_cast(strlen(buffer))); +} + +} + diff --git a/storage/sparrow/engine/socketutil.h b/storage/sparrow/engine/socketutil.h new file mode 100644 index 000000000000..cec630897224 --- /dev/null +++ b/storage/sparrow/engine/socketutil.h @@ -0,0 +1,131 @@ +/* + Socket utilities. +*/ + +#ifndef _engine_socketutil_h_ +#define _engine_socketutil_h_ + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#include +#endif + +#include "exception.h" +#include "misc.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketAddress +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Address to which a socket is bound. Can be used as a key. +class SocketAddress { +private: + + union RawSocketAddress { + struct sockaddr_in v4_; + struct sockaddr_in6 v6_; + } raw_; + bool v6_; + +public: + + SocketAddress() : v6_(false) { + memset(&raw_, 0, sizeof(raw_)); + } + + SocketAddress(struct sockaddr_in& v4) : v6_(false) { + memset(&raw_, 0, sizeof(raw_)); + raw_.v4_ = v4; + } + + SocketAddress(struct sockaddr_in6& v6) : v6_(true) { + memset(&raw_, 0, sizeof(raw_)); + raw_.v6_ = v6; + } + + bool isV6() const { + return v6_; + } + + const struct sockaddr_in& getV4() const { + return raw_.v4_; + } + + struct sockaddr_in& getV4() { + return raw_.v4_; + } + + const struct sockaddr_in6& getV6() const { + return raw_.v6_; + } + + struct sockaddr_in6& getV6() { + return raw_.v6_; + } + + const struct sockaddr* getSockAddr() const { + return isV6() ? (const struct sockaddr*)(&getV6()) : (const struct sockaddr*)(&getV4()); + } + + struct sockaddr* getSockAddr() { + return isV6() ? (struct sockaddr*)(&getV6()) : (struct sockaddr*)(&getV4()); + } + + int getSockAddrLength() const { + return static_cast(isV6() ? sizeof(getV6()) : sizeof(getV4())); + } + + bool operator == (const SocketAddress& right) const { + return v6_ == right.v6_ && memcmp(&raw_, &right.raw_, sizeof(raw_)) == 0; + } + + uint32_t hash() const { + uint32_t h = 1; + uint32_t i = sizeof(raw_); + const uint8_t* raw = (const uint8_t*)&raw_; + while (i-- > 0) { + h = 31 * h + raw[i]; + } + return h; + } + + Str print() const; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SocketUtil +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SocketUtil { +private: + + static my_socket stopSocket_; + static SocketAddress stopSocketAddress_; + static bool v6_; + +public: + + static void initialize() _THROW_(SparrowException); + + static my_socket create(int type, const SocketAddress& address) _THROW_(SparrowException); + + static SocketAddress getAddress(const char* address, uint32_t port) _THROW_(SparrowException); + + static SocketAddress getAddress(my_socket socket) _THROW_(SparrowException); + + static my_socket getStopSocket() { + return stopSocket_; + } + + static bool notifyStopSocket(); +}; + +} + +#endif /* #ifndef _engine_socketutil_h_ */ diff --git a/storage/sparrow/engine/sort.cc b/storage/sparrow/engine/sort.cc new file mode 100644 index 000000000000..5294daffb691 --- /dev/null +++ b/storage/sparrow/engine/sort.cc @@ -0,0 +1,48 @@ +/* + Sort helpers. +*/ + +#include "sort.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SortTest +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// STATIC +void SortTest::run() { + uint32_t n = 10; + for (;;) { + uint32_t max = n * 10; + IndirectorTest indirector; + SYSxvector data; + for (uint32_t i = 0; i < n; ++i) { + indirector.append(i); + const uint32_t v = static_cast(max * static_cast(rand()) / RAND_MAX); + data.append(v); + } + const ComparatorTest comparator(data); + uint64_t t = my_micro_time(); + Sort::quickSort(indirector, comparator, 0, n); + spw_print_information("Quicksort: %u integers in %s", n, Str::fromDuration((my_micro_time() - t) / 1000).c_str()); + uint32_t previous = 0; + for (uint32_t i = 0; i < n; ++i) { + const uint32_t v = data[indirector[i]]; + if (i > 0) { + if (v < previous) { + spw_print_error("Data not sorted!!"); + break; + } + } + previous = v; + } + if (n == 100000000) { + break; + } else { + n *= 10; + } + } +} + +} diff --git a/storage/sparrow/engine/sort.h b/storage/sparrow/engine/sort.h new file mode 100644 index 000000000000..848cd774575c --- /dev/null +++ b/storage/sparrow/engine/sort.h @@ -0,0 +1,220 @@ +/* + Sort helpers. +*/ + +#ifndef _engine_sort_h_ +#define _engine_sort_h_ + +#include "search.h" +#include "thread.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Sort +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class Sort { +private: + + static int med3(const I& indirector, const C& comparator, const int a, const int b, const int c); + + static void swap(I& indirector, const int a, const int b); + + static void vecswap(I& indirector, int a, int b, const int n); + +public: + + static void quickSort(I& indirector, const C& comparator, int off, int len); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Sort +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Stacked ranges to avoid recursion. +class SortRange { +public: + int off_; + int len_; + + SortRange() : off_(0), len_(0) { + } + + SortRange(const int off, const int len) : off_(off), len_(len) { + } +}; + +// STATIC +template inline void Sort::swap(I& indirector, const int a, const int b) { + if (a != b) { + uint32_t* va = &indirector[a]; + uint32_t* vb = &indirector[b]; + const uint32_t t = *va; + *va = *vb; + *vb = t; + } +} + +// STATIC +template inline void Sort::vecswap(I& indirector, int a, int b, const int n) { + for (int i = 0; i < n; ++i, ++a, ++b) { + swap(indirector, a, b); + } +} + +// Returns the index of the median of the three indexed integers. +// STATIC +template inline int Sort::med3(const I& indirector, const C& comparator, const int a, const int b, const int c) { + if (comparator.compare(indirector[a], indirector[b]) < 0) { + if (comparator.compare(indirector[b], indirector[c]) < 0) { + return b; + } else if (comparator.compare(indirector[a], indirector[c]) < 0) { + return c; + } else { + return a; + } + } else { + if (comparator.compare(indirector[b], indirector[c]) > 0) { + return b; + } else if (comparator.compare(indirector[a], indirector[c]) > 0) { + return c; + } else { + return a; + } + } +} + +// Quick sort (same code as in JDK, but not recursive). +// STATIC +template void Sort::quickSort(I& indirector, const C& comparator, int off, int len) { + SortRange stack[32]; + int top = 0; + bool recurse = true; + while (recurse) { + recurse = false; + + // Insertion sort on smallest arrays. + if (len < 7) { + const int hi = off + len - 1; + for (int i = off; i <= hi; ++i) { + for (int j = i; j > off && comparator.compare(indirector[j - 1], indirector[j]) > 0; --j) { + swap(indirector, j, j - 1); + } + } + } else { + // Choose a partition element, v. + int m = off + (len >> 1); // Small arrays, middle element. + if (len > 7) { + int l = off; + int n = off + len - 1; + if (len > 40) { // Big arrays, pseudomedian of 9. + const int s = len / 8; + l = Sort::med3(indirector, comparator, l, l + s, l + 2 * s); + m = Sort::med3(indirector, comparator, m - s, m, m + s); + n = Sort::med3(indirector, comparator, n - 2 * s, n - s, n); + } + m = Sort::med3(indirector, comparator, l, m, n); // Mid-size, med of 3. + } + const int v = indirector[m]; + + // Establish Invariant: v* (v)* v*. + int a = off, b = a, c = off + len - 1, d = c; + while (true) { + while (b <= c) { + const int cmp = comparator.compare(indirector[b], v); + if (cmp > 0) { + break; + } else if (cmp == 0) { + swap(indirector, a++, b); + } + b++; + } + while (c >= b) { + const int cmp = comparator.compare(indirector[c], v); + if (cmp < 0) { + break; + } else if (cmp == 0) { + swap(indirector, c, d--); + } + c--; + } + if (b > c) { + break; + } + swap(indirector, b++, c--); + } + + // Swap partition elements back to middle. + int s; + const int n = off + len; + s = std::min(a - off, b - a); + vecswap(indirector, off, b - s, s); + s = std::min(d - c, n - d - 1); + vecswap(indirector, b, n - s, s); + + const int s1 = b - a; + const int s2 = d - c; + if (s1 > s2) { + if (s1 > 1) { + stack[top++] = SortRange(off, s1); + } + if (s2 > 1) { + off = n - s2; + len = s2; + recurse = true; + } + } else { + if (s2 > 1) { + stack[top++] = SortRange(n - s2, s2); + } + if (s1 > 1) { + len = s1; + recurse = true; + } + } + } + if (!recurse) { + if (--top >= 0) { + const SortRange& r = stack[top]; + off = r.off_; + len = r.len_; + recurse = true; + } + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SortTest +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SortTest { +public: + + static void run(); +}; + +typedef SYSxvector IndirectorTest; + +class ComparatorTest { +private: + + const SYSxvector& data_; + +public: + + ComparatorTest(const SYSxvector& data) : data_(data) { + } + + int compare(const uint32_t row1, const uint32_t row2) const { + const uint32_t v1 = data_[row1]; + const uint32_t v2 = data_[row2]; + return v1 < v2 ? -1 : (v1 > v2 ? 1 : 0); + } + +}; + +} + +#endif /* #ifndef _engine_sort_h_ */ diff --git a/storage/sparrow/engine/thread.cc b/storage/sparrow/engine/thread.cc new file mode 100644 index 000000000000..1e3a2e35598f --- /dev/null +++ b/storage/sparrow/engine/thread.cc @@ -0,0 +1,172 @@ +/* + Thread base classes. +*/ + +#include "sql/sql_class.h" +#include "sql/protocol_classic.h" +#include "sql/sql_lex.h" + +#include "thread.h" +#include "../handler/plugin.h" // For configuration parameters. +#include "io.h" // For IOContext. + +// Utility function to workaround C linkage issue with pthread_create. +extern "C" int createSparrowThread(PSI_thread_key key, my_thread_handle* thread, const my_thread_attr_t* attr, my_start_routine func, void* arg) { + return mysql_thread_create(key, thread, attr, reinterpret_cast(func), arg); +} + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Thread +////////////////////////////////////////////////////////////////////////////////////////////////////// + +Lock Thread::lock_(true, "Thread::lock_"); + +// STATIC +void Thread::initTHD(THD*& thd, void* stackStart) { + my_thread_init(); + thd = new THD(); + THD_CHECK_SENTRY(thd); + thd->thread_stack = (char*)stackStart; + thd->store_globals(); + //thd->init_for_queries(); + + // Initialize security context + thd->m_main_security_ctx.set_host_or_ip_ptr(); + thd->security_context()->set_master_access(ALL_ACCESS); + //thd->main_security_ctx.master_access = ~0; + //thd->main_security_ctx.priv_user[0] = 0; + thd->security_context()->skip_grants(); + + thd->get_protocol_classic()->set_client_capabilities(0); + //thd->client_capabilities = 0; + + thd->get_protocol_classic()->init_net(nullptr); + + CHARSET_INFO* charset_connection = get_charset_by_csname("utf8mb4", MY_CS_PRIMARY, MYF(MY_WME)); + thd->variables.character_set_client = charset_connection; + thd->variables.character_set_results = charset_connection; + thd->variables.collation_connection = charset_connection; + thd->update_charset(); + thd->set_new_thread_id(); + + lex_start(thd); + + // This call should not be usefull since class MessageThread and class Thread create the threads with attribute MY_THREAD_CREATE_DETACHED + //pthread_detach_this_thread(); + thd->real_id = my_thread_self(); +} + +// STATIC +void Thread::deleteThreadSpecific(THD*& thd) { + IOContext::destroy(); + if (thd != 0) { + // No need to delete explicitly items in THD. THD is now a class with a destructor. + //net_end(&thd->net); + //thd->release_resources(); + delete thd; + thd = nullptr; + } + my_thread_end(); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Worker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const JobThreadFactory Worker::factory_("Worker"); +JobThreadPool* Worker::threadPool_ = 0; + +// STATIC +void Worker::initialize() _THROW_(SparrowException) { + // The queue is not bulk because we want jobs to be distributed across workers. + threadPool_ = new JobThreadPool(Worker::factory_, &sparrow_max_worker_threads, + &SparrowStatus::get().workerThreads_, "Worker::Queue", false); +} + +// STATIC +void Worker::shutdown() { + threadPool_->stop(); +} + +// STATIC +void Worker::sendJob(Job* job) { + threadPool_->send(job); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Flush +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const JobThreadFactory Flush::factory_("Flush"); +JobThreadPool* Flush::threadPool_ = 0; + +// STATIC +void Flush::initialize() _THROW_(SparrowException) { + // The queue is not bulk because we want jobs to be distributed across flush threads. + threadPool_ = new JobThreadPool(Flush::factory_, &sparrow_max_flush_threads, + &SparrowStatus::get().flushThreads_, "Flush::Queue", false); +} + +// STATIC +void Flush::shutdown() { + threadPool_->stop(); +} + +// STATIC +void Flush::sendJob(Job* job) { + threadPool_->send(job); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Writer +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const JobThreadFactory Writer::factory_("Writer"); +JobThreadPool* Writer::threadPool_ = 0; + +// STATIC +void Writer::initialize() _THROW_(SparrowException) { + // The queue is not bulk because we want jobs to be distributed across writers. + threadPool_ = new JobThreadPool(Writer::factory_, &sparrow_max_writer_threads, + &SparrowStatus::get().writerThreads_, "Writer::Queue", false); +} + +// STATIC +void Writer::shutdown() { + threadPool_->stop(); +} + +// STATIC +void Writer::sendJob(Job* job) { + threadPool_->send(job); +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ApiWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const JobThreadFactory ApiWorker::factory_("ApiWorker"); +JobThreadPool* ApiWorker::threadPool_ = 0; + +// STATIC +void ApiWorker::initialize() _THROW_(SparrowException) { + // The queue is not bulk because we want jobs to be distributed across API workers. + threadPool_ = new JobThreadPool(ApiWorker::factory_, &sparrow_max_api_worker_threads, + &SparrowStatus::get().apiWorkerThreads_, "ApiWorker::Queue", false); +} + +// STATIC +void ApiWorker::shutdown() { + threadPool_->stop(); +} + +// STATIC +void ApiWorker::sendJob(Job* job) { + threadPool_->send(job); +} + +} + diff --git a/storage/sparrow/engine/thread.h b/storage/sparrow/engine/thread.h new file mode 100644 index 000000000000..4984787093c9 --- /dev/null +++ b/storage/sparrow/engine/thread.h @@ -0,0 +1,555 @@ +/* + Thread base classes. +*/ + +#ifndef _engine_thread_h_ +#define _engine_thread_h_ + +#include "queue.h" +#include "exception.h" +#include "cond.h" +#include "vec.h" +#include "misc.h" +#include "mysql/psi/mysql_thread.h" + +//typedef void*(*ThreadFunction)(void*); +//extern "C" int createSparrowThread(PSI_thread_key key, pthread_t* thread, pthread_attr_t* attr, ThreadFunction func, void* arg); +extern "C" int createSparrowThread(PSI_thread_key key, my_thread_handle* thread, const my_thread_attr_t* attr, my_start_routine func, void* arg); + +extern uint sparrow_idle_thread_timeout; + +class THD; + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Thread +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Thread { +public: + + // Lock used by all start/stop condition variables. + static Lock lock_; + +private: + + PSI_thread_key key_; + PSI_thread_info info_; + my_thread_handle thread_; + volatile bool stop_; + volatile bool stopped_; + Cond startCond_; + Cond stopCond_; + +public: + + Thread(const char* name) : stop_(false), stopped_(false), startCond_(false, lock_, (Str(name) + Str("::startCond_")).c_str()), + stopCond_(false, lock_, (Str(name) + Str("::stopCond_")).c_str()) { + info_.m_key = &key_; + const size_t l = strlen(name); + const char* os_name = name; + os_name += (l >= PFS_MAX_OS_NAME_LENGTH ? (l - PFS_MAX_OS_NAME_LENGTH + 1) : 0); + info_.m_os_name = my_strdup(PSI_INSTRUMENT_ME, os_name, MYF(MY_WME)); + + name += l > PFS_MAX_INFO_NAME_LENGTH ? (l - PFS_MAX_INFO_NAME_LENGTH) : 0; + info_.m_name = my_strdup(PSI_INSTRUMENT_ME, name, MYF(MY_WME)); + info_.m_flags = 0; + info_.m_volatility = PSI_VOLATILITY_UNKNOWN; + info_.m_documentation = PSI_DOCUMENT_ME; +#ifdef HAVE_PSI_INTERFACE + //if (PSI_server != 0) { + Lock::lockPSI(); + mysql_thread_register("sparrow", &info_, 1); + Lock::unlockPSI(); + //} +#endif + } + + virtual ~Thread() { + if (info_.m_name != nullptr) { + my_free(const_cast(info_.m_name)); + info_.m_name = nullptr; + } + } + + bool start() { + my_thread_attr_t attr; + my_thread_attr_init(&attr); + my_thread_attr_setdetachstate(&attr, MY_THREAD_CREATE_DETACHED); + my_thread_attr_setstacksize(&attr, my_thread_stack_size); + Guard guard(lock_); + if (createSparrowThread(key_, &thread_, &attr, handler, static_cast(this)) != 0) { + return false; + } + startCond_.wait(true); + return true; + } + + void stop(const uint64_t timeout=0) { + { + Guard guard(lock_); + stop_ = true; + } + if ( notifyStop() ) { + Guard guard(lock_); + if (!stopped_) + stopCond_.wait(true); + } + } + + void join() { + my_thread_join(&thread_, nullptr); + } + + static void initTHD(THD*& thd, void* stackStart); + static void deleteThreadSpecific(THD*& thd); + +protected: + + virtual bool process() = 0; + + virtual bool notifyStop() = 0; + + virtual bool deleteAfterExit() = 0; + +private: + + static void* handler(void *p) { + THD* thd = 0; // Need to be first for THD thread stack. + Thread* thread = (Thread*)p; + initTHD(thd, &thd); + thread->startCond_.signal(); + while (!thread->stop_) { + if (!thread->process()) { + break; + } + } +#ifdef HAVE_PSI_INTERFACE + //if (PSI_server != 0) { + PSI_THREAD_CALL(delete_current_thread)(); + //} +#endif + deleteThreadSpecific(thd); + if (thread->stop_) { + Guard guard(Thread::lock_); + thread->stopped_ = true; + thread->stopCond_.signal(true); + } else { + if (thread->deleteAfterExit()) { + delete thread; + } + } + my_thread_exit(nullptr); + return nullptr; + + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MessageGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class MessageGuard { +private: + + SYSpSlist& messages_; + +public: + + MessageGuard(SYSpSlist& messages) : messages_(messages) { + } + + ~MessageGuard() { + messages_.clearAndDestroy(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MessageThread +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class MessageThread { + friend class Queue; + +private: + + PSI_thread_key key_; + PSI_thread_info info_; + my_thread_handle thread_; + Queue* queue_; + Cond cond_; + const bool owned_; + volatile bool stopped_; + Cond startCond_; + Cond stopCond_; + uint* timeout_; + +private: + + void initialize(const char* name) { + info_.m_key = &key_; + const size_t l = strlen(name); + const char* os_name = name; + os_name += (l >= PFS_MAX_OS_NAME_LENGTH ? (l - PFS_MAX_OS_NAME_LENGTH + 1) : 0); + info_.m_os_name = my_strdup(PSI_INSTRUMENT_ME, os_name, MYF(MY_WME)); + + name += l > PFS_MAX_INFO_NAME_LENGTH ? (l - PFS_MAX_INFO_NAME_LENGTH) : 0; + info_.m_name = my_strdup(PSI_INSTRUMENT_ME, name, MYF(MY_WME)); + info_.m_flags = 0; + info_.m_volatility = PSI_VOLATILITY_UNKNOWN; + info_.m_documentation = PSI_DOCUMENT_ME; +#ifdef HAVE_PSI_INTERFACE + //if (PSI_server != 0) { + Lock::lockPSI(); + mysql_thread_register("sparrow", &info_, 1); + Lock::unlockPSI(); + //} +#endif + } + + Cond& getCond() { + return cond_; + } + +public: + + MessageThread(const char* name, const bool bulk) + : queue_(new Queue(name, bulk)), cond_(false, *queue_, (Str(name) + Str("::cond_")).c_str()), owned_(true), stopped_(false), + startCond_(false, Thread::lock_, (Str(name) + Str("::startCond_")).c_str()), + stopCond_(false, Thread::lock_, (Str(name) + Str("::stopCond_")).c_str()), timeout_(0) { + initialize(name); + } + + MessageThread(const char* name, Queue& queue, uint* timeout) + : queue_(&queue), cond_(false, *queue_, (Str(name) + Str("::cond_")).c_str()), owned_(false), stopped_(false), + startCond_(false, Thread::lock_, (Str(name) + Str("::startCond_")).c_str()), + stopCond_(false, Thread::lock_, (Str(name) + Str("::stopCond_")).c_str()), timeout_(timeout) { + initialize(name); + } + + void send(M* message) { + queue_->send(message); + } + + bool start() { + my_thread_attr_t attr; + my_thread_attr_init(&attr); + my_thread_attr_setdetachstate(&attr, MY_THREAD_CREATE_DETACHED); + my_thread_attr_setstacksize(&attr, my_thread_stack_size); + Guard guard(Thread::lock_); + if (createSparrowThread(key_, &thread_, &attr, handler, static_cast(this)) != 0) { + return false; + } + startCond_.wait(true); + return true; + } + + void stop() { + Guard guard(Thread::lock_); + if (owned_) { + queue_->signal(); + } + while (!stopped_) { + stopCond_.wait(true); + } + } + + virtual ~MessageThread() { + my_free(const_cast(info_.m_name)); + if (owned_) { + delete queue_; + } + } + + bool operator == (const MessageThread& right) const { + return this == &right; + } + +protected: + + // If message is 0, this thread timed out. + // Return true to continue, or false to stop thread. + virtual bool process(SYSpSlist* messages) = 0; + +private: + + static void* handler(void *p) { + THD* thd = 0; // Need to be first for THD thread stack. + MessageThread* thread = (MessageThread*)p; + Thread::initTHD(thd, &thd); + thread->startCond_.signal(); + SYSpSlist messages; + bool timedOut = false; + while (true) { + MessageGuard guard(messages); + volatile uint* timeout = thread->timeout_; + const bool ok = thread->queue_->wait(thread, timeout == 0 ? 0 : *timeout, messages); + if (thread->queue_->stopped_) { + break; + } + if (ok) { + if (!thread->process(&messages)) { + break; + } + } else if (*thread->timeout_ > 0) { + timedOut = true; + break; + } + } +#ifdef HAVE_PSI_INTERFACE + //if (PSI_server != 0) { + PSI_THREAD_CALL(delete_current_thread)(); + //} +#endif + Thread::deleteThreadSpecific(thd); + if (timedOut) { + thread->queue_->threadTimedOut(thread); + } else { + Guard guard(Thread::lock_); + thread->stopped_ = true; + thread->stopCond_.signal(true); + } + my_thread_exit(nullptr); + return nullptr; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// MessageThreadFactory +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class MessageThreadFactory { +public: + + virtual ~MessageThreadFactory() { + } + + virtual MessageThread* createThread(Queue& queue) const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ThreadPool +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class ThreadPool : public Queue { +private: + + const MessageThreadFactory& factory_; + volatile uint32_t* maxThreads_; // 0 for unlimited. + volatile uint32_t* threadCount_; + SYSpVector, 16> threads_; + +protected: + + void needMoreThreads(const uint32_t count) override { + for (uint32_t i = 0; i < count; ++i) { + const uint32_t length = threads_.length(); + if (*maxThreads_ != 0 && length >= *maxThreads_) { + break; + } else { + MessageThread* thread = factory_.createThread(*this); + if (thread->start()) { + threads_.append(thread); + (*threadCount_)++; + } else { + delete thread; + break; + } + } + } + } + + void threadTimedOut(MessageThread* thread) override { + { + Guard guard(*this); + threads_.remove(thread); + (*threadCount_)--; + } + delete thread; + } + +public: + + ThreadPool(const MessageThreadFactory& factory, volatile uint32_t* maxThreads, volatile uint32_t* threadCount, const char* name, const bool bulk) + : Queue(name, bulk), factory_(factory), maxThreads_(maxThreads), threadCount_(threadCount) { + } + + void stop() { + Queue::signal(); + while (true) { + MessageThread* thread; + { + Guard guard(*this); + if (threads_.isEmpty()) { + break; + } + thread = threads_.first(); + threads_.removeFirst(); + } + thread->stop(); + delete thread; + } + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Job +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Job { +public: + + virtual ~Job() { + } + + virtual void process() = 0; +}; + +typedef SYSpVector Jobs; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// JobThread +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class JobThread : public MessageThread { +protected: + + bool process(SYSpSlist* jobs) override { + SYSpSlistIterator iterator(*jobs); + while (++iterator) { + try { + iterator.key()->process(); + } catch(const SparrowException& e) { + e.toLog(); + } + } + return true; + } + +public: + + JobThread(const char* name, Queue& queue) : MessageThread(name, queue, &sparrow_idle_thread_timeout) { + } + + virtual ~JobThread() { + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ThreadNameGenerator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ThreadNameGenerator { +private: + + const char* prefix_; + +public: + + ThreadNameGenerator(const char* prefix) : prefix_(prefix) { + } + + Str getName() const { + static volatile uint32_t counter = 0; + char tmp[1024]; + snprintf(tmp, sizeof(tmp), "%s(%u)", prefix_, Atomic::inc32(&counter)); + return Str(tmp); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// JobThreadFactory +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class JobThreadFactory : public MessageThreadFactory, private ThreadNameGenerator { +public: + + JobThreadFactory(const char* prefix) : ThreadNameGenerator(prefix) { + } + + MessageThread* createThread(Queue& queue) const override { + return new JobThread(getName().c_str(), queue); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Worker thread pool +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef ThreadPool JobThreadPool; + +class Worker { +private: + + static const JobThreadFactory factory_; + static JobThreadPool* threadPool_; + +public: + + static void initialize() _THROW_(SparrowException); + static void shutdown(); + static void sendJob(Job* job); + static Queue& getQueue() { + return *threadPool_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Flush +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Flush { +private: + + static const JobThreadFactory factory_; + static JobThreadPool* threadPool_; + static volatile uint32_t maxThreads_; + +public: + + static void initialize() _THROW_(SparrowException); + static void shutdown(); + static void sendJob(Job* job); + static Queue& getQueue() { + return *threadPool_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Writer thread pool +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class Writer { +private: + + static const JobThreadFactory factory_; + static JobThreadPool* threadPool_; + +public: + + static void initialize() _THROW_(SparrowException); + static void shutdown(); + static void sendJob(Job* job); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ApiWorker +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ApiWorker { +private: + + static const JobThreadFactory factory_; + static JobThreadPool* threadPool_; + +public: + + static void initialize() _THROW_(SparrowException); + static void shutdown(); + static void sendJob(Job* job); +}; + +} + +#endif /* #ifndef _engine_thread_h_ */ diff --git a/storage/sparrow/engine/transient.cc b/storage/sparrow/engine/transient.cc new file mode 100644 index 000000000000..3b8d9991b145 --- /dev/null +++ b/storage/sparrow/engine/transient.cc @@ -0,0 +1,1800 @@ +/* + Transient partition. +*/ + +#include "../handler/hasparrow.h" +#include "transient.h" +#include "master.h" +#include "purge.h" +#include "fileutil.h" +#include "internalapi.h" +#include "persistent.h" +#include "flush.h" +#include "coalescing.h" +#include "../functions/ipaddress.h" +#include "../dns/dnscache.h" + +#include "../engine/log.h" + +namespace Sparrow { + +using namespace IvFunctions; + +// Instantiate template classes. +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; +template class ColumnAccessorSimple; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TransientPartition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Condition to know if insertion is possible. +Lock TransientPartition::condLock_(true, "TransientPartition::condLock_"); +Cond TransientPartition::insertCond_(true, TransientPartition::condLock_, "TransientPartition::insertCond_"); + +// To wait until flushs are completed. +volatile uint32_t TransientPartition::flushs_ = 0; +Lock TransientPartition::PartLock_(true, "TransientPartition::PartLock_"); +uint64_t TransientPartition::sizeFlushing_ = 0; // Total size of partitions that have been forced to be flushed +SYSvector TransientPartition::AllTransPartitions_; // List of all transient partitions, first one is oldest, last is newest. +SYSvector TransientPartition::FlushingPartitions_; // List of partitions that have been flushed before the normal timeout. + +Cond TransientPartition::flushCond_(true, TransientPartition::condLock_, "TransientPartition::flushCond_"); + +// Empty time period (returned when this transient partition is empty). +TimePeriod TransientPartition::voidPeriod_ = TimePeriod(static_cast(0)).makeIntersection(static_cast(1)); + +TransientPartition::TransientPartition(Master* master, const uint64_t serial) + : Partition(serial, 0, 0, master->getIndexAlterSerial(), master->getColumnAlterSerial()), + master_(master), dnsConfiguration_(master->getDnsConfiguration()), accessors_(0), timestampAccessor_(0), + minTimestamp_(ULLONG_MAX), maxTimestamp_(0), dnsIdAccessor_(0), + hasString_(false), lock_(false, TransientPartition::getName(master, serial, "lock_").c_str()), timestamp_(Scheduler::now()), + records_(0), jobCounter_(0), errors_(0), flush_tries_(0), done_(false), dnsPending_(0), flush_(false), dataSize_(0), indexSize_(0), + stringOffset_(0), stringSize_(0), flushTimestamp_(0), size_(0) { + SPARROW_ENTER("TransientPartition::TransientPartition"); + DBUG_PRINT("sparrow_transient", ("Creating transient partition %s.%s.%llu", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(serial))); + + // Initialize column ids. + const Columns& columns = master_->getColumns(); + uint32_t i; + uint32_t pos = 0; + for (i = 0; i < columns.length(); ++i) { + const Column& column = columns[i]; + if (!column.isDropped()) { + if (column.isString()) { + hasString_ = true; + } + ++pos; + } + } + const uint32_t validColumns = pos; + const Indexes& indexes = master_->getIndexes(); + uint32_t nbIndexes = indexes.length(); + indexStringFlags_ = IndexStringFlags(nbIndexes); + for (i = 0; i < nbIndexes; ++i) { + const Index& index = indexes[i]; + if (!index.isDropped()) { + indexIds_.append(i); + ColumnIds columnIds(index.getColumnIds()); + bool hasString = false; + for (uint32_t j = 0; j < columnIds.length(); ++j) { + const uint32_t id = columnIds[j]; + if (columns[id].isString()) { + hasString = true; + break; + } + } + indexStringFlags_.append(hasString); + columnIds_.append(columnIds); + } + } + + // Create columns. + accessors_.resize(validColumns); + dnsIpAccessors_.resize(validColumns); + for (i = 0; i < validColumns; ++i) { + dnsIpAccessors_.append(0); + } + pos = 0; + for (i = 0; i < columns.length(); ++i) { + const Column& column = columns[i]; + if (column.isDropped()) { + continue; + } + ColumnAccessor* accessor = 0; + switch (column.getType()) { + case COL_STRING: { + ColumnAccessorBin* binAccessor = new ColumnAccessorBin(i, column, binBuffer_); + if (column.isFlagSet(COL_IP_LOOKUP)) { + dnsLookupAccessors_.append(binAccessor); + } + accessor = binAccessor; + break; + } + case COL_BLOB: { + ColumnAccessorBin* binAccessor = new ColumnAccessorBin(i, column, binBuffer_); + if (column.isFlagSet(COL_IP_ADDRESS)) { + dnsIpAccessors_[pos] = binAccessor; + } + accessor = binAccessor; + break; + } + case COL_BYTE: { + if (column.isFlagSet(COL_UNSIGNED)) { + accessor = new ColumnAccessorSimple(i, column); + } else { + accessor = new ColumnAccessorSimple(i, column); + } + break; + } + case COL_SHORT: { + if (column.isFlagSet(COL_UNSIGNED)) { + accessor = new ColumnAccessorSimple(i, column); + } else { + accessor = new ColumnAccessorSimple(i, column); + } + break; + } + case COL_DOUBLE: accessor = new ColumnAccessorSimple(i, column); break; + case COL_INT: { + if (column.isFlagSet(COL_UNSIGNED)) { + accessor = new ColumnAccessorSimple(i, column); + } else { + accessor = new ColumnAccessorSimple(i, column); + } + if (column.isFlagSet(COL_DNS_IDENTIFIER)) { + dnsIdAccessor_ = accessor; + } + break; + } + case COL_LONG: { + if (column.isFlagSet(COL_UNSIGNED)) { + accessor = new ColumnAccessorSimple(i, column); + } else { + accessor = new ColumnAccessorSimple(i, column); + } + break; + } + case COL_TIMESTAMP: { + ColumnAccessorSimple* simpleAccessor = new ColumnAccessorSimple(i, column); + if (pos == 0) { + // First column is the timestamp. + timestampAccessor_ = simpleAccessor; + } + accessor = simpleAccessor; + break; + } + default: { + break; + } + } + accessors_.append(accessor); + ++pos; + } + + // Reference this partition in the list of transient partitions + addPartition(this); +} + +// STATIC +Str TransientPartition::getName(Master* master, const uint64_t serial, const char* name) { + char tmp[1024]; + snprintf(tmp, sizeof(tmp), "TransientPartition(%s.%s.%llu)::%s", + master->getDatabase().c_str(), master->getTable().c_str(), static_cast(serial), name); + return Str(tmp); +} + +TransientPartition::~TransientPartition() { + SPARROW_ENTER("TransientPartition::~TransientPartition"); + DBUG_PRINT("sparrow_transient", ("Destroying transient partition %s.%s.%llu", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + flushedPartition(this); + clear(); +} + +void TransientPartition::clear() { + SPARROW_ENTER("TransientPartition::clear"); + SizeGuard sizeGuard(*this); + for (uint32_t i = 0; i < accessors_.length(); ++i) { + accessors_[i]->clear(); + } + binBuffer_.clear(); + accessors_.clearAndDestroy(); +} + +PartitionSnapshot* TransientPartition::snapshot() { + SPARROW_ENTER("TransientPartition::snapshot"); + ReadGuard guard(lock_); + uint32_t records = getRecords(); + DBUG_PRINT("sparrow_transient", ("Snapshoting transient partition %s.%s.%llu: %u records", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), records)); + if (records == 0) { + return 0; + } else { + return new PartitionSnapshot(this, records); + } +} + +// If necessary, wait until there is some room in the tuple buffer. +// STATIC +bool TransientPartition::waitForRoom(volatile bool& aborting) { + SPARROW_ENTER("TransientPartition::waitForRoom"); + Guard guard(TransientPartition::condLock_); + uint64_t threshold = sparrow_max_tuple_buffer_size; + uint64_t thresholdLower = (sparrow_max_tuple_buffer_size * sparrow_tuple_buffer_threshold) / 100; + + uint64_t waitStartTime = my_micro_time(); + bool hasWaited = false; + bool errMsg = false; + + for(;;) { + uint64_t tupleBufferSize = SparrowStatus::get().tupleBufferSize_; + if (tupleBufferSize >= threshold) { + if (hasWaited) { + uint64_t waitEndTime = my_micro_time(); + uint64_t waitTime = (waitEndTime - waitStartTime)/1000; + if (waitTime > 60000) { // 1 minute threshold + // Make checks and force insertion + Guard guard(PartLock_); + if (!errMsg) { + spw_print_information("[waitForRoom] Waited for room too long: size %llu, threshold %llu, size flushing %llu, nb flushing %u, nb transient %u", + static_cast(tupleBufferSize), static_cast(threshold), static_cast(sizeFlushing_), FlushingPartitions_.entries(), AllTransPartitions_.entries()); + errMsg = true; + } + if (FlushingPartitions_.isEmpty() && AllTransPartitions_.isEmpty()) { + spw_print_information("[waitForRoom] Nothing to flush! Tuple buffer size is wrong. Resetting"); + sizeFlushing_ = 0; + SparrowStatus::get().tupleBufferSize_ = 0; + } + return true; + } + } + + TransientPartition::insertCond_.wait(100, true); + if (aborting) { + return false; + } + threshold = thresholdLower; + hasWaited = true; + } else { + if (hasWaited) { + DBUG_PRINT("sparrow_transient", ("[waitForRoom] There's enough room: %llu < %llu.", static_cast(tupleBufferSize), static_cast(threshold))); + } + break; + } + } + if (hasWaited) { + uint64_t waitEndTime = my_micro_time(); + uint64_t waitTime = (waitEndTime - waitStartTime)/1000; + Atomic::add64(&SparrowStatus::get().flushWait_, waitTime); + } + return true; +} + +// Unmarshalls incoming buffer using columns and fill accessors. +bool TransientPartition::insert(ByteBuffer& buffer, const uint32_t rows, uint64_t& last_timestamp) _THROW_(SparrowException) { + SPARROW_ENTER("TransientPartition::insert"); +#ifndef NDEBUG + uint64_t tstart = my_micro_time(); +#endif + DBUG_PRINT("sparrow_transient", ("Inserting %u rows into transient partition %s.%s.%llu ? Trying to take lock", + rows, master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + + WriteGuard guard(lock_); + + // Check if partition can accept more data. + if (done_) { + DBUG_PRINT("sparrow_transient", ("Transient partition %llu done ==> we need another one", static_cast(getSerial()))); + return false; + } + DBUG_PRINT("sparrow_transient", ("Inserting %u rows into transient partition %s.%s.%llu", + rows, master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + uint32_t saved = timestampAccessor_->length(); + uint64_t savedPosition = buffer.position(); + int64_t autoInc = master_->getAutoInc(); + int64_t savedAutoInc = autoInc; + const uint32_t initial = records_; + uint32_t nbRows = 0; + DBUG_PRINT("sparrow_transient", ("Initially: timestampAccessor_ %u, buffer.position %llu, master_->getAutoInc %lld, records_ %u", + saved, static_cast(savedPosition), static_cast(autoInc), records_)); + bool resolve = false; + uint64_t dnsTimestamp = 0; + const uint64_t coalescingPeriod = master_->getCoalescingPeriod(); + uint64_t low = coalescingPeriod == 0 ? 0 : (minTimestamp_ == ULLONG_MAX ? 0 : (minTimestamp_ - (minTimestamp_ % coalescingPeriod))); + uint64_t up = low == 0 ? 0 : (low + coalescingPeriod); +#ifndef NDEBUG + const Str coalesc = Str::fromDuration(coalescingPeriod); + const Str low_ts = Str::fromTimestamp(low); + const Str up_ts = Str::fromTimestamp(up); + DBUG_PRINT("sparrow_transient", ("coalesc p %s, low %s, up %s", coalesc.c_str(), low_ts.c_str(), up_ts.c_str())); +#endif + try { + SizeGuard sizeGuard(*this); + DataReader reader(buffer, binBuffer_); + while (!reader.end()) { + saved = timestampAccessor_->length(); + savedPosition = buffer.position(); + savedAutoInc = autoInc; + for (uint32_t i = 0; i < accessors_.length(); ++i) { + ColumnAccessor* accessor = accessors_[i]; + const Column& column = accessor->getColumn(); + + // Reverse DNS column: insert NULL for now (see dnsLookup()). + if (column.isFlagSet(COL_IP_LOOKUP)) { + accessor->insertNull(); + resolve = true; + } else if (column.isFlagSet(COL_AUTO_INC)) { + autoInc = accessor->insertAutoInc(autoInc); + } else { + uint8_t isNull = 0; + if (accessor->isNullable()) { + reader >> isNull; + } + if (isNull) { + accessor->insertNull(); + } else { + accessor->insertValue(reader); + } + } + } + nbRows++; + const uint64_t timestamp = timestampAccessor_->last(); + if (timestamp == 0) { + throw SparrowException::create(false, "Cannot insert row with zero timestamp"); + } + // Make checks in case of data corruption + { + uint64_t t = timestamp/1000; // in seconds + t /= (3600ULL*24*364); // Number of years since 1970 + if (t < 30 || t > 60) { + throw SparrowException::create(false, "Cannot insert data: wrong timestamp."); + } + } + if (coalescingPeriod != 0) { + if (low == 0) { + low = timestamp - (timestamp % coalescingPeriod); + up = low + coalescingPeriod; + } else if (timestamp < low || timestamp >= up) { +#ifndef NDEBUG + const Str ts = Str::fromTimestamp(timestamp); + const Str low_ts = Str::fromTimestamp(low); + const Str up_ts = Str::fromTimestamp(up); + DBUG_PRINT("sparrow_transient", ("ts %s (at rows %u), low %s, up %s", ts.c_str(), nbRows, low_ts.c_str(), up_ts.c_str())); +#endif + + for (uint32_t i = 0; i < accessors_.length(); ++i) { + accessors_[i]->shrink(saved); + } + if (saved < records_ || !resolve) { + records_ = saved; + } + buffer.position(savedPosition); + master_->setAutoInc(savedAutoInc); + last_timestamp = timestamp; +#ifndef NDEBUG + const Str min_ts = Str::fromTimestamp(minTimestamp_); + const Str max_ts = Str::fromTimestamp(maxTimestamp_); + DBUG_PRINT("sparrow_transient", ("Finished inserting in transient partition %llu: %s, %s", + static_cast(getSerial()), min_ts.c_str(), max_ts.c_str())); +#endif + scheduleFlush(dnsTimestamp); + return false; + } + } + minTimestamp_ = std::min(minTimestamp_, timestamp); + maxTimestamp_ = std::max(maxTimestamp_, timestamp); + } + + // Lookup IP addresses if necessary. + if (resolve) { + dnsTimestamp = dnsLookup(initial, false); + if (dnsTimestamp != 0) { +#ifndef NDEBUG + const Str sTimestamp = Str::fromTimestamp(dnsTimestamp); + DBUG_PRINT("sparrow_transient", ("Scheduling DNS update of transient partition %s.%s.%llu at %s", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), sTimestamp.c_str())); +#endif + Scheduler::addTask(new DnsTask(master_.get(), getSerial()), dnsTimestamp); + } + } else { + records_ = timestampAccessor_->length(); + } +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + const Str sTimestamp = Str::fromTimestamp(dnsTimestamp); + DBUG_PRINT("sparrow_transient", ("Inserted %u rows into transient partition %s.%s.%llu in %s", + rows, master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), duration.c_str())); + { + DBUG_PRINT("sparrow_transient", ("NOW: timestampAccessor_ %u, buffer.position %llu, master_->getAutoInc %lld, records_ %u", + saved, static_cast(savedPosition), static_cast(autoInc), records_)); + + const Str min_ts = Str::fromTimestamp(minTimestamp_); + const Str max_ts = Str::fromTimestamp(maxTimestamp_); + DBUG_PRINT("sparrow_transient", ("Transient partition %llu has been added %u rows, min, max ts %s, %s", + static_cast(getSerial()), nbRows, min_ts.c_str(), max_ts.c_str())); + } + +#endif + } catch(const SparrowException& e) { + // Rollback to last good position. +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + DBUG_PRINT("sparrow_transient", ("Rollback transient partition %s.%s.%llu to %u rows: %s (insertion done in %s)", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), saved, e.getText(), duration.c_str())); +#endif + SizeGuard sizeGuard(*this); + for (uint32_t i = 0; i < accessors_.length(); ++i) { + accessors_[i]->shrink(saved); + } + if (saved < records_ || !resolve) { + records_ = saved; + } + buffer.position(savedPosition); + master_->setAutoInc(savedAutoInc); + scheduleFlush(dnsTimestamp); + throw e; + } + scheduleFlush(dnsTimestamp); + master_->setAutoInc(autoInc); + return true; +} + + +// Unmarshalls incoming buffer using only a selection of columns and fill accessors. +bool TransientPartition::insert(ByteBuffer& buffer, const uint32_t rows, const Names& columns, const ColumnIds& colIds, uint64_t& last_timestamp) _THROW_(SparrowException) { + SPARROW_ENTER("TransientPartition::insert"); +#ifndef NDEBUG + uint64_t tstart = my_micro_time(); +#endif + DBUG_PRINT("sparrow_transient", ("Inserting %u rows on %u columns into transient partition %s.%s.%llu ? Trying to take lock", + rows, columns.entries(), master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + + WriteGuard guard(lock_); + + // Check if partition can accept more data. + if (done_) { + DBUG_PRINT("sparrow_transient", ("Transient partition %llu done ==> we need another one", static_cast(getSerial()))); + return false; + } + DBUG_PRINT("sparrow_transient", ("Inserting %u rows into transient partition %s.%s.%llu", + rows, master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + + // Building the indirection table between each referenced column and the corresponding column accessor. + Indirector accessorPos(colIds.length()); + for (uint32_t i=0; igetColumnId() == id) { + if (accessorPos.contains(j)) { + throw SparrowException::create(false, "Failed to insert %u rows into transient partition %s.%s.%llu because the column %s is referenced twice.", + rows, master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), columns[i].c_str()); + } + accessorPos[i] = j; + break; + } + } + if (j == accessors_.length()) { + throw SparrowException::create(false, "Failed to insert %u rows into transient partition %s.%s.%llu because no accessor was found for column %s", + rows, master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), columns[i].c_str()); + } + } + //assert( accessorPos.length() == colIds.length() ); + + Indirector accessorMissing(accessors_.length() - accessorPos.length()); + uint j = 0; + for (uint32_t i=0; ilength(); + uint64_t savedPosition = buffer.position(); + int64_t autoInc = master_->getAutoInc(); + int64_t savedAutoInc = autoInc; + const uint32_t initial = records_; + uint32_t nbRows = 0; + DBUG_PRINT("sparrow_transient", ("Initially: timestampAccessor_ %u, buffer.position %llu, master_->getAutoInc %lld, records_ %u", + saved, static_cast(savedPosition), static_cast(autoInc), records_)); + bool resolve = false; + uint64_t dnsTimestamp = 0; + const uint64_t coalescingPeriod = master_->getCoalescingPeriod(); + uint64_t low = coalescingPeriod == 0 ? 0 : (minTimestamp_ == ULLONG_MAX ? 0 : (minTimestamp_ - (minTimestamp_ % coalescingPeriod))); + uint64_t up = low == 0 ? 0 : (low + coalescingPeriod); +#ifndef NDEBUG + const Str coalesc = Str::fromDuration(coalescingPeriod); + const Str low_ts = Str::fromTimestamp(low); + const Str up_ts = Str::fromTimestamp(up); + DBUG_PRINT("sparrow_transient", ("coalesc p %s, low %s, up %s", coalesc.c_str(), low_ts.c_str(), up_ts.c_str())); +#endif + try { + SizeGuard sizeGuard(*this); + DataReader reader(buffer, binBuffer_); + while (!reader.end()) { + saved = timestampAccessor_->length(); + savedPosition = buffer.position(); + savedAutoInc = autoInc; + for (uint32_t i=0; igetColumn(); + assert(column.isFlagSet(COL_IP_LOOKUP) == false); + assert(column.isFlagSet(COL_AUTO_INC) == false); + + uint8_t isNull = 0; + if (accessor->isNullable()) { + reader >> isNull; + } + if (isNull) { + accessor->insertNull(); + } else { + accessor->insertValue(reader); + } + } + for (uint32_t i=0; igetColumn(); + + // Reverse DNS column: insert NULL for now (see dnsLookup()). + if (column.isFlagSet(COL_IP_LOOKUP)) { + accessor->insertNull(); + resolve = true; + } else if (column.isFlagSet(COL_AUTO_INC)) { + autoInc = accessor->insertAutoInc(autoInc); + } else if (accessor->isNullable()) { + accessor->insertNull(); + } else { + accessor->insertDummy(true); + } + } + nbRows++; + const uint64_t timestamp = timestampAccessor_->last(); + if (timestamp == 0) { + throw SparrowException::create(false, "Cannot insert row with zero timestamp"); + } + // Make checks in case of data corruption + { + uint64_t t = timestamp/1000; // in seconds + t /= (3600ULL*24*364); // Number of years since 1970 + if (t < 30 || t > 60) { + throw SparrowException::create(false, "Cannot insert data: wrong timestamp."); + } + } + if (coalescingPeriod != 0) { + if (low == 0) { + low = timestamp - (timestamp % coalescingPeriod); + up = low + coalescingPeriod; + } else if (timestamp < low || timestamp >= up) { +#ifndef NDEBUG + const Str ts = Str::fromTimestamp(timestamp); + const Str low_ts = Str::fromTimestamp(low); + const Str up_ts = Str::fromTimestamp(up); + DBUG_PRINT("sparrow_transient", ("ts %s (at rows %u), low %s, up %s", ts.c_str(), nbRows, low_ts.c_str(), up_ts.c_str())); +#endif + + for (uint32_t i = 0; i < accessors_.length(); ++i) { + accessors_[i]->shrink(saved); + } + if (saved < records_ || !resolve) { + records_ = saved; + } + buffer.position(savedPosition); + master_->setAutoInc(savedAutoInc); + last_timestamp = timestamp; +#ifndef NDEBUG + const Str min_ts = Str::fromTimestamp(minTimestamp_); + const Str max_ts = Str::fromTimestamp(maxTimestamp_); + DBUG_PRINT("sparrow_transient", ("Finished inserting in transient partition %llu: %s, %s", + static_cast(getSerial()), min_ts.c_str(), max_ts.c_str())); +#endif + scheduleFlush(dnsTimestamp); + return false; + } + } + minTimestamp_ = std::min(minTimestamp_, timestamp); + maxTimestamp_ = std::max(maxTimestamp_, timestamp); + } + + // Lookup IP addresses if necessary. + if (resolve) { + dnsTimestamp = dnsLookup(initial, false); + if (dnsTimestamp != 0) { +#ifndef NDEBUG + const Str sTimestamp = Str::fromTimestamp(dnsTimestamp); + DBUG_PRINT("sparrow_transient", ("Scheduling DNS update of transient partition %s.%s.%llu at %s", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), sTimestamp.c_str())); +#endif + Scheduler::addTask(new DnsTask(master_.get(), getSerial()), dnsTimestamp); + } + } else { + records_ = timestampAccessor_->length(); + } +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + const Str sTimestamp = Str::fromTimestamp(dnsTimestamp); + DBUG_PRINT("sparrow_transient", ("Inserted %u rows into transient partition %s.%s.%llu in %s", + rows, master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), duration.c_str())); + { + DBUG_PRINT("sparrow_transient", ("NOW: timestampAccessor_ %u, buffer.position %llu, master_->getAutoInc %lld, records_ %u", + saved, static_cast(savedPosition), static_cast(autoInc), records_)); + + const Str min_ts = Str::fromTimestamp(minTimestamp_); + const Str max_ts = Str::fromTimestamp(maxTimestamp_); + DBUG_PRINT("sparrow_transient", ("Transient partition %llu has been added %u rows, min, max ts %s, %s", + static_cast(getSerial()), nbRows, min_ts.c_str(), max_ts.c_str())); + } + +#endif + } catch(const SparrowException& e) { + // Rollback to last good position. +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + DBUG_PRINT("sparrow_transient", ("Rollback transient partition %s.%s.%llu to %u rows: %s (insertion done in %s)", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), saved, e.getText(), duration.c_str())); +#endif + SizeGuard sizeGuard(*this); + for (uint32_t i = 0; i < accessors_.length(); ++i) { + accessors_[i]->shrink(saved); + } + if (saved < records_ || !resolve) { + records_ = saved; + } + buffer.position(savedPosition); + master_->setAutoInc(savedAutoInc); + scheduleFlush(dnsTimestamp); + throw e; + } + scheduleFlush(dnsTimestamp); + master_->setAutoInc(autoInc); + return true; +} + + +void TransientPartition::scheduleFlush(const uint64_t dnsTimestamp) { + if (timestampAccessor_->length() == 0) { + return; + } + if (dnsTimestamp > flushTimestamp_) { + // If we need more time to complete DNS resolution, change flush timestamp + // but forbid adding more data. + done_ = true; +#ifndef NDEBUG + const Str sTimestamp = Str::fromTimestamp(dnsTimestamp); + DBUG_PRINT("sparrow_transient", ("Delaying flush of transient partition %s.%s.%llu at %s for DNS", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), sTimestamp.c_str())); +#endif + flushTimestamp_ = dnsTimestamp; + Scheduler::addTask(new FlushTask(master_.get(), getSerial()), flushTimestamp_); + } else if (flushTimestamp_ == 0) { + // Schedule timeout flush. + flushTimestamp_ = timestamp_ + sparrow_flush_interval * 1000; +#ifndef NDEBUG + const Str sTimestamp = Str::fromTimestamp(flushTimestamp_); + DBUG_PRINT("sparrow_transient", ("Scheduling flush of transient partition %s.%s.%llu at %s", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), sTimestamp.c_str())); +#endif + Scheduler::addTask(new FlushTask(master_.get(), getSerial()), flushTimestamp_); + } else if (done_) { + // Partition is full; try to flush now. + flushTimestamp_ = dnsTimestamp == 0 ? Scheduler::now() : dnsTimestamp; +#ifndef NDEBUG + const Str sTimestamp = Str::fromTimestamp(flushTimestamp_); + DBUG_PRINT("sparrow_transient", ("Scheduling flush of full transient partition %s.%s.%llu at %s", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), sTimestamp.c_str())); +#endif + Scheduler::addTask(new FlushTask(master_.get(), getSerial()), flushTimestamp_); + } +} + +void TransientPartition::updateDnsConfiguration(DnsConfiguration* dnsConfiguration) { + SPARROW_ENTER("TransientPartition::updateDnsConfiguration"); + WriteGuard guard(lock_); + dnsConfiguration_ = dnsConfiguration; +} + +// DNS lookup is performed through several passes: +// - One pass each time new data are inserted. In this case, start gives +// the starting row for filling lookup columns. This pass gets lookups already in the cache. +// - One last pass after the DNS timeout has expired, and before this partition +// is flushed to disk and mutated. In this case, start is 0. This last pass is necessary +// to fill all cache-miss lookups. +// Returns 0 if DNS resolution is complete, or the timestamp of expected completion +// if there are pending resolutions. +uint64_t TransientPartition::dnsLookup(const uint32_t start, const bool lastPass) { + SPARROW_ENTER("TransientPartition::dnsLookup"); + +#ifndef NDEBUG + uint64_t tstart = my_micro_time(); +#endif + uint64_t now = std::time(nullptr); + uint64_t mnow = my_micro_time(); + bool pending = false; + const uint32_t end = timestampAccessor_->length(); + DnsConfiguration* configuration = dnsConfiguration_.get(); + uint32_t records = UINT_MAX; + for (uint32_t i = 0; i < dnsLookupAccessors_.length(); ++i) { + ColumnAccessorBin* dnsLookupAccessor = dnsLookupAccessors_[i]; + ColumnAccessorBin* dnsIpAccessor = dnsIpAccessors_[dnsLookupAccessor->getColumn().getInfo()]; + uint32_t limit = records_; + for (uint32_t row = start; row < end; ++row) { + // If the IP address is valid and the corresponding lookup is empty, try to resolve. + if (dnsLookupAccessor->isNull(row) && !dnsIpAccessor->isNull(row)) { + const BinString& string = *(*dnsIpAccessor)[row]; // The IP address to resolve into a host name. + bool setIpAsString = false; + if (configuration == 0) { + // No DNS server: set the IP address as a string. + setIpAsString = true; + } else { + // If the DNS identifier is not set or NULL, use wildcard value (-1). + const int id = (dnsIdAccessor_ == 0 || dnsIdAccessor_->isNull(row)) ? -1 : static_cast(dnsIdAccessor_->getValue(row)); + DnsCacheEntry* entry; + { + Guard guard(configuration->getLock()); + entry = configuration->doResolve(now, mnow, id, string.getData(), string.getLength()); + } + if (entry == 0) { + if (lastPass) { + // The DNS worker is late; we should have an entry now. + // Set the IP address as a string. + setIpAsString = true; + } else { + pending = true; + } + } else { + const Str& name = entry->getName(); + dnsLookupAccessor->insertValue(row, name.c_str(), name.length()); + } + } + if (setIpAsString) { + char buffer[128]; + IpAddress address(string.getData(), string.getLength()); + dnsLookupAccessor->insertValue(row, buffer, address.print(buffer)); + } + } + if (!pending && row >= limit) { + limit = row + 1; + } + } + records = std::min(records, limit); + } + if (records != UINT_MAX) { + records_ = records; + } +#ifndef NDEBUG + if (end > start) { + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + DBUG_PRINT("sparrow_transient", ("Resolved ip lookups in transient partition %s.%s.%llu for rows %u-%u%s in %s%s, records=%u", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), start, end, + lastPass ? " (last pass)" : "", duration.c_str(), pending ? " (still pending)" : "", records_)); + } +#endif + if (pending) { + return Scheduler::now() + sparrow_dns_timeout * (1 + sparrow_dns_retries); + } else { + return 0; + } +} + +void TransientPartition::dnsUpdate() { + SPARROW_ENTER("TransientPartition::dnsUpdate"); + WriteGuard guard(lock_, false, true); + if (!flush_) { + SizeGuard sizeGuard(*this); + dnsLookup(records_, false); + } +} + +// Sends jobs to compute indexes and, in turn, write generated data. +bool TransientPartition::flush(const uint64_t timestamp, bool master_lock_taken, bool force) { + SPARROW_ENTER("TransientPartition::flush"); + + TimePeriod period; + uint64_t serial; + uint32_t columnAlterSerial; + uint32_t indexAlterSerial; + ColumnIds emptyColumnsIds; + { + WriteGuard guard(lock_, false, true); + + if ( flush_ || (timestamp != flushTimestamp_ && !force) ) { + return false; + } + + done_ = true; + + { + SizeGuard sizeGuard(*this); + dnsLookup(records_, true); + } + flushingPartition(this); + + if ( getRecords() == 0 ) { + return false; + } + + period = getPeriodNoLock(); + serial = getSerial(); + columnAlterSerial = getColumnAlterSerial(); + indexAlterSerial = getIndexAlterSerial(); + if (sparrow_column_optimisation) { + refreshEmptyColumns(); + emptyColumnsIds = getEmptyColumns(); + } + +#ifndef NDEBUG + const Str trs_period = Str::fromTimePeriod(period); + const Str min_ts = Str::fromTimestamp(minTimestamp_); + const Str max_ts = Str::fromTimestamp(maxTimestamp_); + DBUG_PRINT("sparrow_transient", ("Flush transient partition %llu, nb recs %u, %s [%s, %s]", + static_cast(getSerial()), records_, trs_period.c_str(), min_ts.c_str(), max_ts.c_str() )); + // UGLY ! + char msg[1024] = ""; + for (uint i=0; igetLock(), false, !master_lock_taken); + mainPartition = master_->findMainPartition( serial, period, columnAlterSerial, indexAlterSerial, emptyColumnsIds ); + DBUG_PRINT("sparrow_transient", ("Main partition for transient part %llu would be %llu", static_cast(getSerial()), + static_cast(mainPartition->getSerial()))); + } + + WriteGuard guard(lock_); + if ( !flush_ ) { + // If column or index alteration has taken place while we had released the lock, get the new main partition + if ( columnAlterSerial != getColumnAlterSerial() || indexAlterSerial != getIndexAlterSerial() ) + { + period = getPeriodNoLock(); + serial = getSerial(); + columnAlterSerial = getColumnAlterSerial(); + indexAlterSerial = getIndexAlterSerial(); + +#ifndef NDEBUG + const Str trs_period = Str::fromTimePeriod(period); + const Str min_ts = Str::fromTimestamp(minTimestamp_); + const Str max_ts = Str::fromTimestamp(maxTimestamp_); + DBUG_PRINT("sparrow_transient", ("Column or Index alteration requires new main partition for transient %llu", static_cast(getSerial()) )); +#endif + + guard.release(); + { + ReadGuard masterGuard(master_->getLock(), false, !master_lock_taken); + mainPartition = master_->findMainPartition( serial, period, columnAlterSerial, indexAlterSerial, emptyColumnsIds ); + DBUG_PRINT("sparrow_transient", ("New Main partition for transient part %llu would be %llu", static_cast(getSerial()), static_cast(mainPartition->getSerial()))); + } + guard.acquire(); + } + setEmptyColumns( mainPartition->getSkippedColumns() ); + + doFlush( mainPartition ); + } + + return true; +} + +void TransientPartition::refreshEmptyColumns() { + emptyColumnsIds_.clear(); + for (uint32_t i=0; igetColumn().isFlagSet(COL_IP_LOOKUP)) + continue; + if (accessors_[i]->areAllNulls()) { + emptyColumnsIds_.append(accessors_[i]->getColumnId()); + } + } +} + +// Returns true if the partition is being flushed +bool TransientPartition::forceFlush( bool master_lock_taken ) { + SPARROW_ENTER("TransientPartition::forceFlush"); + return flush(flushTimestamp_, master_lock_taken, true); +} + +void TransientPartition::doFlush( PersistentPartitionGuard mainPartition ) { + SPARROW_ENTER("TransientPartition::doFlush"); + { + Guard flushGuard(TransientPartition::condLock_); + flushs_++; + } + + // DEBUG BPL - is already set in method flush() + assert(done_ == true); + if (!done_) { + done_ = true; + spw_print_information("[DEBUG] Forcing done_ to true on flushed partition %s.%s.%llu", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial())); + } + + dataSerial_ = mainPartition->getSerial(); + + // Set file system for index files. + const bool coalescing = sparrow_coalescing && master_->getCoalescingPeriod() != 0; + setFilesystem(FileUtil::chooseFilesystem(coalescing)); + + // Prepare flush jobs. + assert(getRecords() > 0); // This partition cannot be empty. + assert(jobCounter_ == 0 && errors_ == 0); + DBUG_PRINT("sparrow_transient", ("Sending flush jobs for transient partition %s.%s.%llu", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + FlushJob* job = new FlushJob(this, mainPartition, columnIds_.length() + 1, 1); + if (hasString_) { + // Send string job, which will in turn fire string-dependent jobs. + job->getWorkerJobs().append(new StringJob(this, mainPartition)); + } else { + // Write data file. + job->getWriterJobs().append(new WriteJob(this, mainPartition, DATA_FILE, 0)); + } + + // Compute and write indexes not using strings. + for (uint32_t i = 0; i < columnIds_.length(); ++i) { + if (!indexStringFlags_[i]) { + job->getWorkerJobs().append(new IndexJob(this, mainPartition, i)); + } + } + Flush::sendJob(job); + flush_ = true; +} + +// Optimize strings using existing string file, if any. +void TransientPartition::flushStrings(PersistentPartitionGuard mainPartition) _THROW_(SparrowException) { + SPARROW_ENTER("TransientPartition::flushStrings"); + FileSection stringsSection; + const bool newFile = mainPartition->getSerial() > getSerial(); + ReadGuard guard(lock_); + if (!newFile) { + // Optimize strings from existing main partition. + { + PartitionReader dataReader(*mainPartition, DATA_FILE, BlockCacheHint::smallForward0_); + const FileHeaderBase& header = dataReader.getHeader(); + stringsSection = header.getStringsSection(); + } + PartitionReader reader(*mainPartition, STRING_FILE, BlockCacheHint::largeForward0_); + reader.seek(stringsSection.getOffset()); + DBUG_PRINT("sparrow_transient", ("Optimizing strings for transient partition %s.%s.%llu", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + binBuffer_.optimize(reader, stringsSection.getSize()); + } + + // Flush strings. + char filename[FN_REFLEN]; + mainPartition->getFileName(STRING_FILE, filename); + const PartitionFile partitionFile(*mainPartition, STRING_FILE); + const SimpleWriteCacheHint writeHint(partitionFile, 3); + { + FileWriter writer(filename, FILE_TYPE_STRING, newFile ? FILE_MODE_CREATE : FILE_MODE_UPDATE, &writeHint, + stringsSection.getOffset() + stringsSection.getSize()); + const uint64_t save = writer.getFileSize(); + stringOffset_ = writer.getFileOffset(); + stringSize_ = binBuffer_.flush(writer); + if (writer.getFileSize() == 0) { + writer << "SPARROW"; + } + writer.write(); + Atomic::add64(&dataSize_, writer.getFileSize() - save); + if (stringSize_ + stringsSection.getSize() <= master_->getStringOptimization()) { + stringOffset_ = stringsSection.getOffset(); + stringSize_ += stringsSection.getSize(); + } + } + DBUG_PRINT("sparrow_transient", ("Written strings for transient partition %s.%s.%llu to main partition %llu", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), static_cast(mainPartition->getSerial()))); + + // We're going to trigger new jobs: jobs that needed that the SPS file was up to date first. So first, increment the jobCounter_ accordingly. + for (uint32_t i = 0; i < columnIds_.length(); ++i) { + if (indexStringFlags_[i]) { + incJobCounter(); + } + } + + // Write data file. + Writer::sendJob(new WriteJob(this, mainPartition, DATA_FILE, 0)); + + // Compute and write indexes using strings. + for (uint32_t i = 0; i < columnIds_.length(); ++i) { + if (indexStringFlags_[i]) { + Worker::sendJob(new IndexJob(this, mainPartition, i)); + } + } +} + +template class Sort; + +// Computes the given index. +void TransientPartition::compute(PersistentPartitionGuard mainPartition, const uint32_t id) { + SPARROW_ENTER("TransientPartition::compute"); + ReadGuard guard(lock_); + const uint32_t rows = getRecords(); + +#ifndef NDEBUG + uint64_t tstart = my_micro_time(); +#endif + + // Sort index indirector. + Indirector* indirector = new Indirector(); + for (uint32_t i = 0; i < rows; ++i) { + indirector->append(i); + } + const RowComparator comparator(*this, columnIds_[id]); + Sort::quickSort(*indirector, comparator, 0, rows); +#ifndef NDEBUG + const uint32_t indexId = indexIds_[id]; + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + DBUG_PRINT("sparrow_transient", ("Computed index %u of transient partition %s.%s.%llu in %s", indexId, + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), duration.c_str())); +#endif + + // Send job to write generated data. + Writer::sendJob(new WriteJob(this, mainPartition, id, indirector)); +} + +// Generates and writes a given index or data file. For data files, indirector is NULL and id = DATA_FILE. +void TransientPartition::write(PersistentPartitionGuard mainPartition, const uint32_t id, const Indirector* indirector) _THROW_(SparrowException) { + SPARROW_ENTER("TransientPartition::write"); +#ifndef NDEBUG + const uint64_t tstart = my_micro_time(); +#endif + + // If id == DATA_FILE, we write the data file. + const bool isDataFile = id == DATA_FILE; + const bool newFile = mainPartition->getDataRecords() == 0; + const uint32_t fileId = isDataFile ? DATA_FILE : indexIds_[id]; + const uint32_t rows = getRecords(); + + ReadGuard guard(lock_); + + // Prepare tree construction. + TreeNodes nodes; + uint32_t nodeSize = 0; + if (!isDataFile) { + const ColumnIds& columnIds = columnIds_[id]; + ColumnPos columnPos; + getColumnPos(columnPos, columnIds); + uint32_t start = 0; + uint32_t previousRow = 0; + SYSxvector count; + for (uint32_t row = 0; row < rows; ++row) { + const uint32_t currentRow = (*indirector)[row]; + if (row == 0) { + start = row; + } else { + if (compare(columnPos, previousRow, currentRow, false) != 0) { + count.append(row - start); + start = row; + } + } + previousRow = currentRow; + } + count.append(rows - start); + const uint32_t nDistinct = count.length(); + RecordWriter treeRecordWriter(accessors_, &columnIds); + nodeSize = treeRecordWriter.getSize() + 8; // TODO row size. We need two row numbers in nodes + nodes = TreeNodes(nDistinct); + uint64_t k = 0; + start = 0; + for (uint32_t i = 0; i < nDistinct; ++i) { + const uint32_t end = start + count[i]; + if (static_cast(nDistinct) * i >= k) { + k += nDistinct; + nodes.append(TreeNode(start, end - 1)); + } + start = end; + } + + // Make sure the last node is the last element of the list. + if (nodes.last().getEnd() + 1 < rows) { + nodes.last() = TreeNode(rows - count.last(), count.last() - 1); + } + assert(nodes.length() == nDistinct); + } + + assert(getColumnAlterSerial() == mainPartition->getColumnAlterSerial() && getEmptyColumns().containsTheSame(mainPartition->getSkippedColumns()) == true); + + // Determine stored columns + ColumnIds columnsIds; + if (isDataFile) { + // Build the list of valid columns (= columns that are not empty) + for (uint32_t i=0; igetColumnId()))) { + columnsIds.append(accessors_[i]->getColumnId()); + } + } + } else { + columnsIds = columnIds_[id]; + } + + RecordWriter recordWriter(accessors_, &columnsIds); + const uint32_t recordSize = isDataFile ? recordWriter.getSize() : 4; // TODO row size. + const TimePeriod period = getPeriodNoLock(); + const uint64_t recordOffset = mainPartition->getDataRecords(); + + // Start writing file. + char filename[FN_REFLEN]; + if (isDataFile) { + mainPartition->getFileName(fileId, filename); + } else { + master_->getFileName(PersistentPartition::currentVersion_, getFilesystem(), period, fileId, getSerial(), mainPartition->getSerial(), filename); + } + const PartitionFile partitionFile(*mainPartition, fileId); + const SimpleWriteCacheHint writeHint(partitionFile, 3); + FileWriter writer(filename, isDataFile ? FILE_TYPE_DATA : FILE_TYPE_INDEX, (!isDataFile || newFile) ? FILE_MODE_CREATE : FILE_MODE_UPDATE, + isDataFile ? &writeHint : 0, isDataFile ? (DataFileHeader::size() + recordOffset * recordSize) : 0); + const uint64_t save = writer.getFileSize(); + if (isDataFile) { + if (newFile) { + const DataFileHeader header(recordSize, rows, stringOffset_, stringSize_, period.getMin(), period.getMax()); + writer << header; + } + + // Write data records. + for (uint32_t row = 0; row < rows; ++row) { + recordWriter.write(writer, row); + } + } else { + const uint32_t nNodes = nodes.length(); + const IndexFileHeader header(fileId, recordSize, rows, nodeSize, nNodes, period.getMin(), period.getMax()); + writer << header; + + // Write index records (row numbers in data file). + for (uint32_t row = 0; row < rows; ++row) { + writer << static_cast(recordOffset + (*indirector)[row]); + } + + // Write tree. + const TreeOrder& treeOrder = TreeOrder::get(nNodes); + RecordWriter treeRecordWriter(accessors_, &columnIds_[id]); + for (uint32_t i = 0; i < nNodes; ++i) { + const uint32_t inode = treeOrder.getListIndex(i, nNodes); + assert(inode < nNodes); + const TreeNode& node = nodes[inode]; + const uint32_t start = node.getStart(); + const uint32_t end = node.getEnd(); + writer << start << end; + treeRecordWriter.write(writer, (*indirector)[start]); + } + } + writer.write(); + if (isDataFile && !newFile) { + const TimePeriod mainPeriod = mainPartition->getPeriod(); + const DataFileHeader header(recordSize, mainPartition->getDataRecords() + rows, stringOffset_, stringSize_, + std::min(mainPeriod.getMin(), period.getMin()), std::max(mainPeriod.getMax(), period.getMax())); + writer.seek(0, header.size()); + writer << header; + writer.write(); + } + const uint64_t size = writer.getFileSize() - save; + if (isDataFile) { + Atomic::add64(&dataSize_, size); + } else { + Atomic::add64(&indexSize_, size); + } +#ifndef NDEBUG + const Str duration(Str::fromDuration((my_micro_time() - tstart) / 1000)); + if (fileId == DATA_FILE) { + DBUG_PRINT("sparrow_transient", ("Written data of transient partition %s.%s.%llu to main partition %llu in %s", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), static_cast(mainPartition->getSerial()), duration.c_str())); + } else { + DBUG_PRINT("sparrow_transient", ("Written index %u of partition %s.%s.%llu in %s", fileId, + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()), duration.c_str())); + } +#endif +} + +void TransientPartition::endFlush(PersistentPartitionGuard mainPartition) { + if (decJobCounter()) { + try { + bool mutated = mutate(mainPartition); + getMaster()->allowUpdate(); + getMaster()->endFlush(mainPartition->getSerial()); + if (!mutated) { + resetFlush(); + flush(flushTimestamp_); + } + } catch(const SparrowException& e) { + getMaster()->allowUpdate(); + getMaster()->endFlush(mainPartition->getSerial()); + throw; + } + } +} + +bool TransientPartition::mutate(PersistentPartitionGuard mainPartition) _THROW_(SparrowException) { + SPARROW_ENTER("TransientPartition::mutate"); + DBUG_PRINT("sparrow_transient", ("Mutating transient partition %s.%s.%llu to a persistent partition", + master_->getDatabase().c_str(), master_->getTable().c_str(), static_cast(getSerial()))); + bool mutated = false; + TransientMutationGuard mutationGuard; + const bool newMain = mainPartition->getDataRecords() == 0; + if (errors_ == 0) { + // Create a new persistent partition from the transient one for the indexes. + PersistentPartition* newPartition = new PersistentPartition(PersistentPartition::currentVersion_, master_.get(), getSerial(), mainPartition.get(), + getFilesystem(), getIndexAlterSerial(), getColumnAlterSerial(), + getPeriodNoLock(), getRecords(), 0, getIndexSize(), 0, mainPartition->getDataRecords(), mainPartition->getSkippedColumns()); + // Use an AutoPtr to automatically delete this new partition if an exception is thrown + AutoPtr newPartitionGuard(newPartition); + + // Alterations could occurred while the partition was transient. + bool doAlter = false; + { + WriteGuard guard(master_->getLock()); + doAlter = master_->getIndexAlterSerial() > getIndexAlterSerial(); + + // Update this master file. + mainPartition->addDataSize(getDataSize()); + mainPartition->addDataRecords(getRecords()); + master_->mutatePartition(this, mainPartition.get(), newPartition); + newPartitionGuard.release(); // Release the AutoPtr so that the partition does not get deleted when the object goes out of scope. + + // Write this master file to disk. + master_->toDisk(); + mutated = true; + } + if (doAlter) { + master_->startIndexAlter(false); + } + + // Try to coalesce. + master_->coalesce(); + } else { + if (newMain) { + // This will drop the newly created main partition. + mainPartition->releaseRef(); + } + if (++flush_tries_ == 2) { + WriteGuard guard(master_->getLock()); + master_->mutatePartition(this, 0, 0); + mutated = true; + } + } + return mutated; +} + +// Wait until all on-going flushes are completed. +// STATIC +void TransientPartition::waitForFlushs() { + SPARROW_ENTER("TransientPartition::waitForFlushs"); + Guard flushGuard(TransientPartition::condLock_); + while (flushs_ > 0) { + if (flushCond_.wait(1000, true)) { + break; + } + } +} + +// Navigation methods. Data are ordered only by timestamp, so using another index +// requires an indirector built when the transient partition has been "snapshoted"; +// see QueryInfo::updateIndirector(). + +template class BinarySearch; + +Position TransientPartition::indexFind(Context& context, const uint32_t partition, const KeyValue& key, + const SearchFlag searchFlag) const { + SPARROW_ENTER("TransientPartition::indexFind"); + QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[indexFind] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[indexFind] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + assert(rows > 0); + ComparatorTransient comparator(context, *this, snapshot->getIndirector(), key, queryInfo.getCurrentKey().getKey()); + const Position pos(partition, BinarySearch::find(comparator, 0, rows, searchFlag)); + if (pos.isValid()) { + const uint32_t row = pos.getRow(); + return Position(partition, snapshot->getIndirector()[row], row); + } else { + return pos; + } +} + +Position TransientPartition::indexFirst(Context& context, const uint32_t partition) const { + SPARROW_ENTER("TransientPartition::indexFirst"); + const QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[indexFirst] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[indexFirst] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + assert(snapshot->getRows() > 0); + return Position(partition, snapshot->getIndirector()[0], 0); +} + +Position TransientPartition::indexLast(Context& context, const uint32_t partition) const { + SPARROW_ENTER("TransientPartition::indexLast"); + const QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[indexLast] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[indexLast] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + assert(rows > 0); + return Position(partition, snapshot->getIndirector()[rows - 1], rows - 1); +} + +Position TransientPartition::indexNext(Context& context, const Position& position) const { + SPARROW_ENTER("TransientPartition::indexNext"); + const QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[indexNext] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[indexNext] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + assert(rows > 0); + const uint32_t row = position.getIndexHint(); + if (row + 1 == rows) { + return Position(position.getPartition()); + } else { + return Position(position.getPartition(), snapshot->getIndirector()[row + 1], row + 1); + } +} + +Position TransientPartition::indexPrevious(Context& context, const Position& position) const { + SPARROW_ENTER("TransientPartition::indexPrevious"); + const QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[indexPrevious] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[indexPrevious] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + assert(snapshot->getRows() > 0); + const uint32_t row = position.getIndexHint(); + if (row == 0) { + return Position(position.getPartition()); + } else { + return Position(position.getPartition(), snapshot->getIndirector()[row - 1], row - 1); + } +} + +Position TransientPartition::moveNext(Context& context, const Position& position) const { + SPARROW_ENTER("TransientPartition::moveNext"); + const QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[moveNext] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[moveNext] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + assert(rows > 0); + const uint32_t newRow = position.getRow() + 1; + if (newRow < rows) { + return Position(position.getPartition(), newRow); + } else { + return Position(position.getPartition()); + } +} + +Position TransientPartition::movePrevious(Context& context, const Position& position) const { + SPARROW_ENTER("TransientPartition::movePrevious"); + assert(context.getQueryInfo().getSnapshot(this)->getRows() > 0); + if (position.getRow() > 0) { + return Position(position.getPartition(), position.getRow() - 1); + } else { + return Position(position.getPartition()); + } +} + +Position TransientPartition::moveAbsolute(Context& context, const Position& position) const { + SPARROW_ENTER("TransientPartition::moveAbsolute"); + const QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[moveAbsolute] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[moveAbsolute] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(position.getPartition()); + } + assert(rows > 0); + if (position.getRow() < rows) { + return position; + } else { + return Position(position.getPartition()); + } +} + +Position TransientPartition::moveFirst(Context& context, const uint32_t partition) const { + SPARROW_ENTER("TransientPartition::moveFirst"); + assert(context.getQueryInfo().getSnapshot(this)->getRows() > 0); + return Position(partition, 0); +} + +Position TransientPartition::moveLast(Context& context, const uint32_t partition) const { + SPARROW_ENTER("TransientPartition::moveLast"); + const QueryInfo& queryInfo = context.getQueryInfo(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[moveLast] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[moveLast] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return Position(partition); + } + assert(rows > 0); + return Position(partition, rows - 1); +} + +uint32_t TransientPartition::recordsInRange(Context& context, const uint32_t partition, const key_range* minKey, const key_range* maxKey) const { + SPARROW_ENTER("TransientPartition::recordsInRange"); + QueryInfo& queryInfo = context.getQueryInfo(); + const TableFields& fields = context.getShare().getMappedFields(); + const PartitionSnapshot* snapshot = queryInfo.getSnapshot(this); + if (snapshot == NULL) { + spw_print_information("[recordsInRange] No snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return 0; + } + const uint32_t rows = snapshot->getRows(); + if (rows == 0) { + spw_print_information("[recordsInRange] Empty snapshot for partition %llu (%llu) for table %s.%s. Partition info: done %u, flushing %u, errors %u", + static_cast(this->getSerial()), static_cast(this->getDataSerial()), master_->getDatabase().c_str(), + master_->getTable().c_str(), this->done_, this->flush_, this->errors_); + return 0; + } + assert(rows > 0); + uint8_t* curKey = queryInfo.getCurrentKey().getKey(); + uint32_t count = 0; + const bool incMin = (minKey != 0 && minKey->flag == HA_READ_KEY_EXACT); + const bool incMax = (maxKey != 0 && maxKey->flag == HA_READ_AFTER_KEY); + Position pos(0); + for (uint32_t row = 0; row < rows; ++row) { + pos.setRow(row); + int cmpMin; + if (minKey == 0) { + cmpMin = 1; + } else { + const KeyValue minKeyValue(minKey); + if (!readKey(context, pos, true, minKeyValue.getMap(), curKey, true)) { + return 0; + } + cmpMin = queryInfo.compareKeys(fields, queryInfo.getCurrentKey(), minKeyValue); + } + int cmpMax; + if (maxKey == 0) { + cmpMax = -1; + } else { + const KeyValue maxKeyValue(maxKey); + if (!readKey(context, pos, true, maxKeyValue.getMap(), curKey, true)) { + return 0; + } + cmpMax = queryInfo.compareKeys(fields, queryInfo.getCurrentKey(), maxKeyValue); + } + if ((cmpMin > 0 || (incMin && cmpMin == 0)) + && (cmpMax < 0 || (incMax && cmpMax == 0))) { + count++; + } + } + return count; +} + +// Reads a given index record and sets MySQL fields. +bool TransientPartition::readKey(Context& context, const Position& position, const bool forward, + const key_part_map keyPartMap, uint8_t* buffer, const bool keyFormat) const { + SPARROW_ENTER("TransientPartition::readKey"); + assert(position.isValid()); + uint32_t row = position.getRow(); + const QueryInfo& queryInfo = context.getQueryInfo(); + const TableFields& fields = context.getShare().getMappedFields(); + const KEY& keyInfo = queryInfo.getKeyInfo(); + for (uint32_t i = 0; i < keyInfo.user_defined_key_parts; ++i) { + if ((keyPartMap & (1 << i)) == 0) { + continue; + } + const KEY_PART_INFO& keyPartInfo = keyInfo.key_part[i]; + int fieldId = keyPartInfo.fieldnr - 1; + const FieldBase& field = *fields[fieldId]; + const ColumnAccessor& accessor = *accessors_[fieldId]; + field.readTransient(accessor.getValue(row), accessor.isNull(row), buffer, keyFormat); + if (keyFormat) { + buffer += (field.isNullable() ? 1 : 0) + field.getLength(true); + } + } + return true; +} + +// Reads a given data record and sets MySQL fields. +bool TransientPartition::readData(Context& context, const Position& position, uint8_t* buffer, const BlockCacheHint& hint) const { + SPARROW_ENTER("TransientPartition::readData"); + assert(position.isValid()); + const uint32_t row = position.getRow(); + TABLE& table = context.getTable(); + + // In case of update, need to read all fields. + const bool forUpdate = !bitmap_is_clear_all(table.write_set); + memset(buffer, 0, table.s->null_bytes); + const TableFields& fields = context.getShare().getMappedFields(); + const uint32_t n = fields.length(); + for (uint32_t i = 0; i < n; ++i) { + if (forUpdate || bitmap_is_set(table.read_set, i)) { + const ColumnAccessor& accessor = *accessors_[i]; + fields[i]->readTransient(accessor.getValue(row), accessor.isNull(row), buffer, false); + } + } + return true; +} + +bool TransientPartition::updateData(Context& context, const Position& position, const uint8_t* buffer) { + SPARROW_ENTER("TransientPartition::updateData"); + assert(position.isValid()); + WriteGuard guard(lock_, false, true); + const uint32_t row = position.getRow(); + const TableFields& fields = context.getShare().getMappedFields(); + const uint32_t n = fields.length(); + for (uint32_t i = 0; i < n; ++i) { + ColumnAccessor& accessor = *accessors_[i]; + if (!context.isUpdatableColumn(accessor.getColumnId())) { + continue; + } + const FieldBase& field = *fields[i]; + uint64_t v; + if (field.readMySqlTransient(buffer, v)) { + accessor.setNull(row); + } else { + accessor.writeValue(row, v); + if (field.isNullable()) { + accessor.resetNull(row); + } + } + } + return true; +} + +// STATIC +// Force flush of some transient partitions to free space in the tuple buffer. Start with partitions bigger than 1MB, and among those, flush the oldest ones first. +// Then, if it's not enough, flush smaller partitions (size threshold = 500KB), and if that is not enough, divide again the threshold by 2 and iterate again. And so on. +// Flushing small partitions will pollute IOs while not freeing much memory. +uint64_t TransientPartition::flushOldestPartitions(const uint64_t& sizeToFlush) { + if (sizeToFlush == 0) + return 0; + + Guard guard(PartLock_); + uint64_t timestamp = Scheduler::now(); + uint32_t n = 0; + DBUG_PRINT("sparrow_transient", ("Making room in the tuple buffer: size to free %llu, nb partitions %u, nb part being flushed %u (%llu).", + static_cast(sizeToFlush), AllTransPartitions_.entries(), FlushingPartitions_.entries(), static_cast(sizeFlushing_))); + uint64_t sizeFlushed = 0, sizeThreshold = 1024*1024; + while (sizeFlushed < sizeToFlush) { + uint32_t i = 0; + while (sizeFlushed < sizeToFlush && i < AllTransPartitions_.entries()) { + TransientPartition* partition = AllTransPartitions_[i]; + assert(partition != NULL); + const uint64_t partSize = partition->getCachedSize(); + if (partSize < sizeThreshold) { + ++i; + continue; + } + flushingPartitionNoLock(partition); + sizeFlushed += partSize; + ++n; + DBUG_PRINT("sparrow_transient", ("Forcing flush of partition %u/%u, %s.%s.%llu, size %llu.", + i, AllTransPartitions_.entries(), partition->getMaster()->getDatabase().c_str(), partition->getMaster()->getTable().c_str(), + static_cast(partition->getSerial()), static_cast(partSize))); + partition->flushTimestamp_ = timestamp; + Scheduler::addTask(new FlushTask(partition->getMaster(), partition->getSerial()), timestamp); + Atomic::inc64(&SparrowStatus::get().flushForced_); + } + if (AllTransPartitions_.isEmpty()) { + break; + } + sizeThreshold /= 2; + } + DBUG_PRINT("sparrow_transient", ("Triggered flush of %u partitions for a total size of %llu (aimed for %llu). Total size of partitions being flushed %llu", + n, static_cast(sizeFlushed), static_cast(sizeToFlush), static_cast(sizeFlushing_))); + return sizeFlushed; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SizeGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SizeGuard::~SizeGuard() { + SPARROW_ENTER("SizeGuard::~SizeGuard"); + const int64_t partSize = partition_.getSize(); + const int64_t delta = partSize - initialSize_; + volatile uint64_t& tupleBufferSize = SparrowStatus::get().tupleBufferSize_; + { + Guard guard(TransientPartition::condLock_); + DBUG_PRINT("sparrow_transient", ("Tuple buffer size changed from %llu to %llu (delta %lld)", static_cast(tupleBufferSize), + static_cast(tupleBufferSize + delta), static_cast(delta))); + tupleBufferSize += delta; + TransientPartition::updateFlushingSize(&partition_, delta); + + const uint64_t thresholdSize = (sparrow_max_tuple_buffer_size * sparrow_tuple_buffer_threshold) / 100; + if (tupleBufferSize < thresholdSize) { + TransientPartition::insertCond_.signalAll(true); + } + + const uint64_t sizeFlushing = TransientPartition::getSizeFlushing(); + assert(tupleBufferSize >= sizeFlushing); + uint64_t usedTupleBufferSize = tupleBufferSize - sizeFlushing; + DBUG_PRINT("sparrow_transient", ("Used tuple buffer size: %llu", static_cast(usedTupleBufferSize))); + if (usedTupleBufferSize >= thresholdSize) { + TransientPartition::flushOldestPartitions(usedTupleBufferSize - thresholdSize + 1); + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionSnapshot +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class Sort; + +void PartitionSnapshot::updateIndirector(const uint32_t index) { + assert(rows_ > 0); + if (index == DATA_FILE) { + delete indirector_; + indirector_ = 0; + } else { + if (indirector_ == 0) { + indirector_ = new ConcurrentIndirector(); + } else { + indirector_->clear(); + } + + // Initializes indirector if we use an index other than the timestamp index. + // This indirector is used for searching quickly in the transient partition. + for (uint32_t row = 0; row < rows_; ++row) { + indirector_->append(row); + } + const RowComparator comparator(*partition_.get(), partition_->getColumnIds(index)); + Sort::quickSort(*indirector_, comparator, 0, rows_); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ComparatorTransient +////////////////////////////////////////////////////////////////////////////////////////////////////// + +ComparatorTransient::ComparatorTransient(Context& context, const TransientPartition& partition, const ConcurrentIndirector& indirector, const KeyValue& key, uint8_t* buffer) + : context_(context), partition_(partition), queryInfo_(context.getQueryInfo()), fields_(context.getShare().getMappedFields()), key_(key), + indirector_(indirector), tempKey_(buffer, key.getMap()) { +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RecordWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +RecordWriter::RecordWriter(const ColumnAccessors& accessors, const ColumnIds* columnIds) { + if (columnIds == 0) { + // Data file: all columns. + accessors_ = accessors; + } else { + // Index file or data files with skipped columns. + accessors_.resize(columnIds->length()); + for (uint32_t i = 0; i < columnIds->length(); ++i) { + uint32_t id = (*columnIds)[i]; + uint32_t j = 0; + for (; j < accessors.entries(); ++j) { + if (accessors[j]->getColumnId() == id) + break; + } + assert(j < accessors.entries()); + if (j == accessors.entries()) { + throw SparrowException::create(false, "Can't find column Id %u in valid column list. Failed to build RecordWriter.", id); + } + accessors_.append(accessors[j]); + } + } + bits_ = 0; + size_ = 0; + for (uint32_t i = 0; i < accessors_.length(); ++i) { + const Column& column = accessors_[i]->getColumn(); + size_ += column.getDataSize(); + bits_ += column.getBits(); + } + size_ += (bits_ + 7) / 8; +} + +void RecordWriter::write(ByteBuffer& buffer, const uint32_t row) _THROW_(SparrowException) { + // Write bits first. + const uint8_t bitLength = (bits_ + 7) / 8; + uint8_t bits[SPARROW_MAX_BIT_SIZE]; + if (bitLength > 0) { + memset(bits, 0, sizeof(bits)); + uint32_t n = 0; + for (uint32_t i = 0; i < accessors_.length(); ++i) { + const uint8_t bitValues = accessors_[i]->getBitValues(row); + const uint32_t nbits = accessors_[i]->getColumn().getBits(); + for (uint32_t j = 0; j < nbits; ++j) { + if (bitValues & (1 << j)) { + bits[n / 8] |= (1 << (n % 8)); + } + n++; + } + } + buffer << ByteBuffer(bits, bitLength); + } + + // Write column values. + for (uint32_t i = 0; i < accessors_.length(); ++i) { + accessors_[i]->write(buffer, row); + } +} + +} + diff --git a/storage/sparrow/engine/transient.h b/storage/sparrow/engine/transient.h new file mode 100644 index 000000000000..f007b80c2719 --- /dev/null +++ b/storage/sparrow/engine/transient.h @@ -0,0 +1,1150 @@ +/* + Transient partition. +*/ + +#ifndef _engine_transient_h_ +#define _engine_transient_h_ + +#include "master.h" +#include "cache.h" +#include "sort.h" +#include "binbuffer.h" + +#include "../engine/log.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DataReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// To unmarshall inserted data. +class DataReader { +private: + + ByteBuffer& buffer_; + BinBuffer& binBuffer_; + +public: + + DataReader(ByteBuffer& buffer, BinBuffer& binBuffer) + : buffer_(buffer), binBuffer_(binBuffer) { + } + + DataReader& operator >> (int8_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (uint8_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (int16_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (uint16_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (int32_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (uint32_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (int64_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (uint64_t& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (double& v) { + buffer_ >> v; + return *this; + } + + DataReader& operator >> (BinString*& string) { + uint32_t length; + buffer_ >> length; + const uint64_t pos = buffer_.position(); + if (pos + length <= buffer_.limit()) { + // The string is in the buffer. + string = binBuffer_.insert(buffer_.getCurrentData(), length); + buffer_.position(pos + length); + } else { + // The string crosses the buffer boundary; use a temporary buffer. + ByteBuffer stringBuffer(static_cast(IOContext::getTempBuffer1(length)), length); + buffer_ >> stringBuffer; + string = binBuffer_.insert(stringBuffer.getData(), length); + } + return *this; + } + + bool end() const { + return buffer_.end(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ColumnComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ColumnComparator { +public: + + // Blobs and strings. + static int compare(const BinString& v1, const BinString& v2, CHARSET_INFO* cs) { + return cs->coll->strnncollsp(cs, v1.getData(), v1.getLength(), v2.getData(), v2.getLength()); + } + + // Bytes. + static int compare(const int8_t v1, const int8_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + + static int compare(const uint8_t v1, const uint8_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + + // Shorts. + static int compare(const int16_t v1, const int16_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + + static int compare(const uint16_t v1, const uint16_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + + // Integers. + static int compare(const int32_t v1, const int32_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + + static int compare(const uint32_t v1, const uint32_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + + // Longs. + static int compare(const int64_t v1, const int64_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + + // Timestamps. + static int compare(const uint64_t v1, const uint64_t v2) { + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); + } + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + + // Doubles. + static int compare(const double v1, const double v2) { + if (v1 < v2) { + return -1; + } else if (v1 > v2) { + return 1; + } + // Handle properly special cases. + const uint64_t u1 = *(const uint64_t*)&v1; + const uint64_t u2 = *(const uint64_t*)&v2; + return u1 == u2 ? 0 : (u1 < u2 ? -1 : 1); + } + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ColumnAccessor +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* Stores column values. ColumnAccessor is the base class defining the interface. + It has a default implementation for some basic methods (NULL management for example). +*/ + +class ColumnAccessor { +private: + + const uint32_t columnId_; + const Column column_; + SYSbitVector > nulls_; + +protected: + + virtual int compareRows(const uint32_t row1, const uint32_t row2) const = 0; + virtual void clearData() = 0; + virtual void shrinkData(const uint32_t length) = 0; + virtual int64_t getSize() const = 0; + +public: + + ColumnAccessor(const uint32_t columnId, const Column& column) : columnId_(columnId), column_(column) { + } + virtual ~ColumnAccessor() { + } + uint32_t getColumnId() const { + return columnId_; + } + const Column& getColumn() const { + return column_; + } + bool isNullable() const { + return column_.isFlagSet(COL_NULLABLE); + } + bool isNull(const uint32_t row) const { + return isNullable() ? (row < nulls_.length() ? nulls_[row] : false) : false; + } + bool areAllNulls() const { + return (nulls_.length() == length() && nulls_.areAll(true)); + } + void setNull(const uint32_t row) { + if (isNullable()) { + nulls_.setBit(row); + } + } + void insertNull(const uint32_t row) { + nulls_.setBit(row); + insertDummy(); + } + void resetNull(const uint32_t row) { + nulls_.clearBit(row); + } + void insertNull() { + insertNull(length()); + } + virtual void insertDummy(bool real=false) = 0; + virtual void insertValue(DataReader& reader) = 0; + virtual int64_t insertAutoInc(const int64_t autoInc) = 0; + int compare(const uint32_t row1, const uint32_t row2) const { + if (isNull(row1)) { // NULLs are the smallest values. + return isNull(row2) ? 0 : -1; + } else if (isNull(row2)) { + return 1; + } + return compareRows(row1, row2); + } + virtual uint32_t length() const = 0; + void shrink(const uint32_t length) { + nulls_.shrink(length); + shrinkData(length); + } + void clear() { + nulls_.clear(); + clearData(); + } + virtual uint8_t getBitValues(const uint32_t row) const = 0; + virtual void write(ByteBuffer& buffer, const uint32_t row) const _THROW_(SparrowException) = 0; + virtual uint64_t getValue(const uint32_t row) const = 0; + virtual void writeValue(const uint32_t row, const uint64_t data) = 0; + int64_t getTotalSize() const { + return getSize() + nulls_.getSize(); + } +}; + +typedef SYSpVector ColumnAccessors; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ColumnAccessorSimple +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* Generic implementation of ColumnAccessor for basic types. Values are stored in a SYSlvector +*/ + +template class ColumnAccessorSimple : public ColumnAccessor, public SYSlvector { +protected: + + void insertDummy(bool real=false) override { + this->append((T)0); + } + + int compareRows(const uint32_t row1, const uint32_t row2) const override { + return ColumnComparator::compare((*this)[row1], (*this)[row2]); + } + + void clearData() override { + SYSlvector::clear(); + } + + void shrinkData(const uint32_t length) override { + SYSlvector::shrink(length); + } + + int64_t getSize() const override { + return SYSlvector::getSize(); + } + +public: + + ColumnAccessorSimple(const uint32_t columnId, const Column& column) : ColumnAccessor(columnId, column) { + } + + uint8_t getBitValues(const uint32_t row) const override { + return isNull(row) ? 1 : 0; + } + + uint32_t length() const override { + return SYSlvector::length(); + } + + void insertValue(DataReader& reader) override { + T t; + reader >> t; + this->append(t); + } + + int64_t insertAutoInc(const int64_t autoInc) override { + return autoInc; + } + + void write(ByteBuffer& buffer, const uint32_t row) const override _THROW_(SparrowException) { + if (isNull(row)) { + buffer << (T)0; + } else { + buffer << (*this)[row]; + } + } + + uint64_t getValue(const uint32_t row) const override { + return static_cast((*this)[row]); + } + + void writeValue(const uint32_t row, const uint64_t data) override { + (*this)[row] = static_cast(data); + } +}; + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + +// Specialization for doubles. +template<> inline uint64_t ColumnAccessorSimple::getValue(const uint32_t row) const { + return *(const uint64_t*)&(*this)[row]; +} + +template<> inline void ColumnAccessorSimple::writeValue(const uint32_t row, const uint64_t data) { + (*this)[row] = *(const double*)&data; +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +// Specialization for int64_t. +template<> inline int64_t ColumnAccessorSimple::insertAutoInc(const int64_t autoInc) { + append(autoInc); + return autoInc + 1; +} + +// Specialization for uint64_t. +template<> inline int64_t ColumnAccessorSimple::insertAutoInc(const int64_t autoInc) { + append(static_cast(autoInc)); + return autoInc + 1; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ColumnAccessorBin +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* Specialized implementation of the ColumnAccessor interface for Strings. + Works for small strings (length <= 16 bytes) and long strings. + . BinBuffer& buffer_ contains the actual strings in one BIG buffer. The buffer is a reference to TransientPartition::binBuffer_. + . BinStrings is a vector having as many items as rows in the partition this ColumnAccessorBin is used by. + Each item in the BinStrings points to a string in buffer_. +*/ + +class ColumnAccessorBin : public ColumnAccessor, public BinStrings { +private: + + BinBuffer& buffer_; + CHARSET_INFO* cs_; + +protected: + + void insertDummy(bool real=false) override { + static BinString dummy; + if ( real ) { + append(&dummy); + } else { + append(0); + } + } + + int compareRows(const uint32_t row1, const uint32_t row2) const override { + return ColumnComparator::compare(*(*this)[row1], *(*this)[row2], cs_); + } + + void clearData() override { + BinStrings::clear(); + } + + void shrinkData(const uint32_t length) override { + BinStrings::shrink(length); + } + + int64_t getSize() const override { + return BinStrings::getSize(); + } + +public: + + ColumnAccessorBin(const uint32_t columnId, const Column& column, BinBuffer& buffer) + : ColumnAccessor(columnId, column), buffer_(buffer) { + CHARSET_INFO* cs = get_charset_by_name(column.getCharset().c_str(), MYF(MY_WME)); + if (cs == 0) { + if (column.getType() == COL_STRING) { + cs = &my_charset_utf8mb4_bin; + } else { + cs = &my_charset_bin; + } + } + cs_ = cs; + } + + uint8_t getBitValues(const uint32_t row) const override { + if (isNull(row)) { + return 1; + } else { + const BinString& string = *(*this)[row]; + if (isNullable()) { + return string.isSmall() ? (string.getLength() << 1) : 0; + } else { + return string.isSmall() ? string.getLength() : 0; + } + } + } + + // Insert a new string in buffer_ (if it does not exist already) and add a reference to it in this column's current row. + void insertValue(const uint32_t row, const char* s, const uint32_t length) { + resetNull(row); + (*this)[row] = buffer_.insert(reinterpret_cast(s), length); + } + + int64_t insertAutoInc(const int64_t autoInc) override { + return autoInc; + } + + uint32_t length() const override { + return BinStrings::length(); + } + + void insertValue(DataReader& reader) override { + BinString* string; + reader >> string; + append(string); + } + + // Short string (length <= 16 bytes) are padded to 16 bytes with 0. + // Long strings (length > 16 bytes) are stored in the format: + // . length on 1 or 2 bytes depending on string length + // . string content + void write(ByteBuffer& buffer, const uint32_t row) const override _THROW_(SparrowException) { + if (isNull(row)) { + buffer << static_cast(0) << static_cast(0); + } else { + const BinString& string = *(*this)[row]; + const uint32_t length = string.getLength(); + if (string.isSmall()) { + buffer << ByteBuffer(string.getData(), length); + const uint32_t remainder = 16 - length; + for (uint32_t i = 0; i < remainder; ++i) { + buffer << static_cast(0); + } + } else { + buffer << string.getOffset() << static_cast(length); + } + } + } + + uint64_t getValue(const uint32_t row) const override { + return (uint64_t)(*this)[row]; + } + + void writeValue(const uint32_t row, const uint64_t data) override { + assert(0); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RecordWriter +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class RecordWriter { +private: + + ColumnAccessors accessors_; + uint32_t bits_; + uint32_t size_; + +public: + + RecordWriter(const ColumnAccessors& accessors, const ColumnIds* columnIds); + + uint32_t getSize() const { + return size_; + } + + void write(ByteBuffer& buffer, const uint32_t row) _THROW_(SparrowException); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ConcurrentIndirector +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// For partition snapshots. +typedef SYSlvector ConcurrentIndirector; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Indirector +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef SYSxvector Indirector; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TransientPartition +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// The transient partition is a special partition. +// Incoming data are first written to this partition. When the partition is full or too old, +// indexes are generated and everything is written to disk. +// Also, the transient partition may be browsed when a query is being processed by the engine. +class PersistentPartition; +class FlushTask; +class DnsTask; +class PartitionSnapshot; +class SizeGuard; +class RowComparator; +class TransientMutationGuard; +typedef SYSvector IndexStringFlags; +class TransientPartition : public Partition { + friend class SizeGuard; + friend class RowComparator; + friend class TransientMutationGuard; + +private: + + // Parent master file. + MasterGuard master_; + + // DNS configuration. + DnsConfigurationGuard dnsConfiguration_; + + // Column accessors. + ColumnAccessors accessors_; + + // Columns that contain no valid data (all values are NULL). + ColumnIds emptyColumnsIds_; + + // Timestamp accessor. + ColumnAccessorSimple* timestampAccessor_; + + // Min/max timestamps. + uint64_t minTimestamp_; + uint64_t maxTimestamp_; + + // DNS identifier accessor, may be null. + ColumnAccessor* dnsIdAccessor_; + + // Buffer where strings and blobs are stored. This BinBuffer stores all strings and blobs for this partition, whatever column they belong to. + // That includes IP addresses and IP lookups. + BinBuffer binBuffer_; + + // Column ids for all indexes. + IndexIds indexIds_; + ColumnIdsArray columnIds_; + bool hasString_; + IndexStringFlags indexStringFlags_; + + // Lock to protect data in transient columns. + RWLock lock_; + + // Creation timestamp. + const uint64_t timestamp_; + + // Records ready to be queried. + uint32_t records_; + + // Counter to know when this partition is flushed to disk. + volatile uint32_t jobCounter_; + + // Counter of errors while flushing. If an error occurs, partition is lost. + volatile uint32_t errors_; + + volatile uint32_t flush_tries_; + + // Flag indicating if this partition is done (full or being flushed). + bool done_; + + // Number of pending DNS queries for this partition. + volatile uint32_t dnsPending_; + + // Flag indicating if flushing has started. + bool flush_; + + // File sizes. + uint64_t dataSize_; + uint64_t indexSize_; + + // String info. + uint64_t stringOffset_; + uint64_t stringSize_; + + // To resolve IP addresses. + SYSvector dnsIpAccessors_; + SYSvector dnsLookupAccessors_; + + // Flush timestamp. + uint64_t flushTimestamp_; + + // Partition size + uint64_t size_; + + static TimePeriod voidPeriod_; + + // Condition to know if insertion is possible. + static Lock condLock_; + static Cond insertCond_; + + // To wait until flushs are completed. + static volatile uint32_t flushs_; // Counts the number of flush task pending or currently executing. + static Cond flushCond_; // Signaled when flushs_ reaches 0 + + static Lock PartLock_; + static SYSvector AllTransPartitions_; // List of all transient partitions, first one is oldest, last is newest + static SYSvector FlushingPartitions_; + static uint64_t sizeFlushing_; + + static void addPartition(TransientPartition* partition) { + Guard guard(PartLock_); + AllTransPartitions_.append(partition); + } + + static void updateFlushingSize(TransientPartition* partition, const int64_t& delta) { + Guard guard(PartLock_); + uint64_t size = partition->getCachedSize(); + assert(delta > 0 || static_cast(size) >= delta); + size = static_cast(static_cast(size) + delta); + DBUG_PRINT("sparrow_transient", ("Update %s.%s.%llu size to %lu", partition->getMaster()->getDatabase().c_str(), partition->getMaster()->getTable().c_str(), + static_cast(partition->getSerial()), size)); + partition->setCachedSize(size); + if (FlushingPartitions_.contains(partition)) { + sizeFlushing_ += delta; + DBUG_PRINT("sparrow_transient", ("Updating tot size to %llu (delta %lld).", static_cast(sizeFlushing_), static_cast(delta))); + } + } + + static void flushingPartitionNoLock(TransientPartition* partition) { + if (AllTransPartitions_.remove(partition)) { + const uint64_t size = partition->getCachedSize(); + assert(FlushingPartitions_.contains(partition) == false); + FlushingPartitions_.append(partition); + sizeFlushing_ += size; + DBUG_PRINT("sparrow_transient", ("Flushing partition %s.%s.%llu, size %llu, tot size %llu, nb transient %u, nb flushing %u.", + partition->getMaster()->getDatabase().c_str(), partition->getMaster()->getTable().c_str(), static_cast(partition->getSerial()), + static_cast(size), static_cast(sizeFlushing_), AllTransPartitions_.entries(), FlushingPartitions_.entries())); + + } else { + assert(FlushingPartitions_.contains(partition) == true); + assert(sizeFlushing_ >= static_cast(partition->getCachedSize())); + DBUG_PRINT("sparrow_transient", ("Flushing forced partition %s.%s.%llu, size %llu, tot size %llu, nb transient %u, nb flushing %u.", + partition->getMaster()->getDatabase().c_str(), partition->getMaster()->getTable().c_str(), static_cast(partition->getSerial()), + static_cast(partition->getCachedSize()), static_cast(sizeFlushing_), AllTransPartitions_.entries(), FlushingPartitions_.entries())); + } + } + + static void flushingPartition(TransientPartition* partition) { + Guard guard(PartLock_); + flushingPartitionNoLock(partition); + } + + static void flushedPartition(TransientPartition* partition) { + Guard guard(PartLock_); + const uint64_t size = partition->getCachedSize(); + if (partition->flush_) { + assert(AllTransPartitions_.contains(partition) == false); + [[maybe_unused]] bool removed = FlushingPartitions_.remove(partition); + assert(removed == true); + } else { + AllTransPartitions_.remove(partition); + FlushingPartitions_.remove(partition); + } + sizeFlushing_ -= (sizeFlushing_ > size ? size : sizeFlushing_); + DBUG_PRINT("sparrow_transient", ("Flushed partition %s.%s.%llu, size %llu, tot size %llu, nb transient %u, nb flushing %u.", + partition->getMaster()->getDatabase().c_str(), partition->getMaster()->getTable().c_str(), static_cast(partition->getSerial()), + static_cast(partition->getCachedSize()), static_cast(sizeFlushing_), AllTransPartitions_.entries(), FlushingPartitions_.entries())); + } + + static uint64_t flushOldestPartitions(const uint64_t& sizeToFlush); + +private: + + static Str getName(Master* master, const uint64_t serial, const char* name); + + void initialize(); + + int compare(const ColumnPos& columnPos, const int row1, const int row2, const bool sortByRow) const { + for (uint32_t i = 0; i < columnPos.length(); ++i) { + const int cmp = accessors_[columnPos[i]]->compare(row1, row2); + if (cmp != 0) { + return cmp; + } + } + if (sortByRow) { + // Sort by row: in case of identical values, we get a better locality. + return row1 > row2 ? 1 : (row1 < row2 ? -1 : 0); + } else { + return 0; + } + } + + uint64_t dnsLookup(const uint32_t start, const bool lastPass); + + void scheduleFlush(const uint64_t dnsTimestamp); + + void doFlush(PersistentPartitionGuard mainPartition); + + void refreshEmptyColumns(); + + // Copy and assignment are forbidden. + TransientPartition(const TransientPartition& right); + TransientPartition& operator = (const TransientPartition& right); + +public: + + TransientPartition(Master* master, const uint64_t serial); + + ~TransientPartition(); + + Master* getMaster() { + return master_.get(); + } + + void clear(); + + void detach() override { + master_ = 0; + } + + const ColumnAccessors& getAccessors() const { + return accessors_; + } + + const ColumnIds& getColumnIds(const uint32_t index) const { + uint32_t id = UINT_MAX; + for (uint32_t i = 0; i < indexIds_.length(); ++i) { + if (indexIds_[i] == index) { + id = i; + break; + } + } + return columnIds_[id]; + } + + void getColumnPos(ColumnPos& pos, const ColumnIds& ids) const { + pos.clear(); + pos.resize(ids.entries()); + for (uint32_t i = 0; i < ids.length(); ++i) { + uint32_t id = ids[i]; + uint32_t j = 0; + for (; j < accessors_.entries(); ++j) { + if (accessors_[j]->getColumnId() == id) + break; + } + assert(j < accessors_.entries()); + if (j == accessors_.entries()) { + throw SparrowException::create(false, "Can't find column id %u in valid column list for partition %s.%s.%llu.", + id, master_.get()->getDatabase().c_str(), master_.get()->getTable().c_str(), static_cast(getSerial())); + } + pos.append(j); + } + } + + PartitionSnapshot* snapshot(); + + // Attributes. + + uint32_t getRecords() const override { + return records_; + } + uint32_t getRecordsSafe() const { + ReadGuard guard(const_cast(lock_)); + return records_; + } + TimePeriod getPeriodNoLock() const { + if (records_ == 0) { + return voidPeriod_; + } else { + return TimePeriod(minTimestamp_, maxTimestamp_); + } + } + TimePeriod getPeriod() const override { + ReadGuard guard(const_cast(lock_)); + return getPeriodNoLock(); + } + uint64_t getDataSize() const override { + return dataSize_; + } + uint64_t getIndexSize() const override { + return indexSize_; + } + + bool isTransient() const override { + return true; + } + + bool isReady() const override { + return master_->getIndexAlterSerial() == getIndexAlterSerial(); + } + + bool isIndexAlterable() const override { + return false; + } + + const ColumnIds& getEmptyColumns() const { + return emptyColumnsIds_; + } + + void setEmptyColumns(const ColumnIds& emptyColumnsIds) { + emptyColumnsIds_ = emptyColumnsIds; + } + + // Data access. + + Position indexFind(Context& context, const uint32_t partition, const KeyValue& key, const SearchFlag searchFlag) const override; + + Position indexFirst(Context& context, const uint32_t partition) const override; + + Position indexLast(Context& context, const uint32_t partition) const override; + + Position indexNext(Context& context, const Position& position) const override; + + Position indexPrevious(Context& context, const Position& position) const override; + + Position moveNext(Context& context, const Position& position) const override; + + Position movePrevious(Context& context, const Position& position) const override; + + Position moveAbsolute(Context& context, const Position& position) const override; + + Position moveFirst(Context& context, const uint32_t partition) const override; + + Position moveLast(Context& context, const uint32_t partition) const override; + + uint32_t recordsInRange(Context& context, const uint32_t partition, const key_range* minKey, const key_range* maxKey) const override; + + bool readKey(Context& context, const Position& position, const bool forward, + const key_part_map keyPartMap, uint8_t* buffer, const bool keyFormat) const override; + + bool readData(Context& context, const Position& position, uint8_t* buffer, const BlockCacheHint& hint) const override; + + bool updateData(Context& context, const Position& position, const uint8_t* buffer) override; + + // Insertion and flushing. + + static bool waitForRoom(volatile bool& aborting); + + bool insert(ByteBuffer& buffer, const uint32_t rows, uint64_t& timestamp) _THROW_(SparrowException); + + bool insert(ByteBuffer& buffer, const uint32_t rows, const Names& columns, const ColumnIds& colIds, uint64_t& timestamp) _THROW_(SparrowException); + + void dnsUpdate(); + + bool isDone() { + ReadGuard guard(lock_); + return done_; + } + + void flushStrings(PersistentPartitionGuard mainPartition) _THROW_(SparrowException); + + void compute(PersistentPartitionGuard mainPartition, const uint32_t id); + + void write(PersistentPartitionGuard mainPartition, const uint32_t indexId, const Indirector* indirector) _THROW_(SparrowException); + + void updateDnsConfiguration(DnsConfiguration* dnsConfiguration); + + bool flush(const uint64_t timestamp, bool master_lock_taken=false, bool force=false); + + bool forceFlush(bool master_lock_taken=false); + + uint32_t getNbFlushTries() const { return flush_tries_; } + + void incJobCounter() { + Atomic::inc32(&jobCounter_); + } + + bool decJobCounter() { + return (Atomic::dec32(&jobCounter_) == 0); + } + + void setJobCounter(const uint32_t jobCounter) { + jobCounter_ = jobCounter; + } + + uint32_t getJobCounter() { + return jobCounter_; + } + + void endFlush(PersistentPartitionGuard mainPartition); + + void error() { + Atomic::inc32(&errors_); + } + + void resetFlush() { + flush_ = false; + Atomic::add32(&errors_, -static_cast(errors_)); + } + + bool mutate(PersistentPartitionGuard mainPartition) _THROW_(SparrowException); + + uint64_t getCachedSize() const { + return size_; + } + + void setCachedSize(const uint64_t size) { + size_ = size; + } + + int64_t getSize() const { + int64_t size = binBuffer_.getSize(); + for (uint32_t i = 0; i < accessors_.length(); ++i) { + size += accessors_[i]->getTotalSize(); + } + return size; + } + + static void waitForFlushs(); + + static uint32_t getNbFlushs() { + Guard flushGuard(TransientPartition::condLock_); + return flushs_; + } + + static uint64_t getSizeFlushing() { return sizeFlushing_; } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TransientMutationGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TransientMutationGuard { +public: + + TransientMutationGuard() { + } + + ~TransientMutationGuard() { + Guard flushGuard(TransientPartition::condLock_); + if (--TransientPartition::flushs_ == 0) { + TransientPartition::flushCond_.signalAll(true); + } + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RowComparator +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class RowComparator { +private: + + const TransientPartition& partition_; + ColumnPos columnPos_; + +public: + + RowComparator(const TransientPartition& partition, const ColumnIds& columnIds) + : partition_(partition) { + partition_.getColumnPos(columnPos_, columnIds); + } + + int compare(const uint32_t row1, const uint32_t row2) const { + return partition_.compare(columnPos_, row1, row2, true); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TransientTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TransientTask : public MasterTask { +private: + + // Keep the partition serial instead of a direct reference to the transient partition, + // because such a reference will prevent it from being deleted until the last task is processed. + const uint64_t serial_; + +private: + + virtual void process(TransientPartition& partition, const uint64_t timestamp) _THROW_(SparrowException) = 0; + +public: + + TransientTask(Master* master, const uint64_t serial) : MasterTask(Worker::getQueue(), master), serial_(serial) { + } + + TransientTask(Master* master, const uint64_t serial, Queue& queue) : MasterTask(queue, master), serial_(serial) { + } + + virtual ~TransientTask() { + } + + virtual bool operator == (const TransientTask& right) const { + return this == &right; + } + + virtual bool operator == (const Task& right) const override { + return false; + } + + uint64_t getPeriod() const override { + return 0; + } + + void run(const uint64_t timestamp) override _THROW_(SparrowException) { + PartitionGuard partition = get()->getPartition(serial_); + if (partition.get() != 0 && partition->isTransient()) { + process(static_cast(*partition), timestamp); + } + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DnsTask +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DnsTask : public TransientTask { +private: + + void process(TransientPartition& partition, const uint64_t timestamp) override _THROW_(SparrowException) { + partition.dnsUpdate(); + } + +public: + + DnsTask(Master* master, const uint64_t serial) : TransientTask(master, serial) { + Atomic::inc32(&SparrowStatus::get().tasksPendingDnsTasks_); + } + + ~DnsTask() { + Atomic::dec32(&SparrowStatus::get().tasksPendingDnsTasks_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartitionSnapshot +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PartitionSnapshot { +private: + + TransientPartitionGuard partition_; + + const uint32_t rows_; + + ConcurrentIndirector* indirector_; + +public: + + PartitionSnapshot() : rows_(0), indirector_(0) { + } + + PartitionSnapshot(TransientPartition* partition) + : partition_(partition), rows_(0), indirector_(0) { + } + + PartitionSnapshot(TransientPartition* partition, const uint32_t rows) + : partition_(partition), rows_(rows), indirector_(0) { + assert(rows_ > 0); + } + + ~PartitionSnapshot() { + delete indirector_; + } + + uint32_t getRows() const { + return rows_; + } + + const ConcurrentIndirector& getIndirector() const { + return *indirector_; + } + + void updateIndirector(const uint32_t index); + + bool operator == (const PartitionSnapshot& right) const { + return partition_ == right.partition_; + } + + uint32_t hash() const { + return partition_->hash(); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SizeGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Updates sparrow_tuple_buffer_size. +class SizeGuard { +private: + + TransientPartition& partition_; + const int64_t initialSize_; + +public: + + SizeGuard(TransientPartition& partition) : partition_(partition), initialSize_(partition_.getSize()) { + } + + ~SizeGuard(); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ComparatorTransient +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ComparatorTransient { +private: + + Context& context_; + const TransientPartition& partition_; + const QueryInfo& queryInfo_; + const TableFields& fields_; + const KeyValue& key_; + const ConcurrentIndirector& indirector_; + KeyValue tempKey_; + +public: + + ComparatorTransient(Context& context, const TransientPartition& partition, const ConcurrentIndirector& indirector, const KeyValue& key, uint8_t* buffer); + + int compareTo(const uint32_t row) _THROW_(SparrowException) { + if (!partition_.readKey(context_, Position(0, indirector_[row]), true, key_.getMap(), tempKey_.getKey(), true)) { + throw SparrowException::create(false, "MySQL error, data too large for column size?"); + } + return queryInfo_.compareKeys(fields_, tempKey_, key_); + } +}; + +} + +#endif /* #ifndef _engine_transient_h_ */ diff --git a/storage/sparrow/engine/treeorder.cc b/storage/sparrow/engine/treeorder.cc new file mode 100644 index 000000000000..cc1a58513335 --- /dev/null +++ b/storage/sparrow/engine/treeorder.cc @@ -0,0 +1,94 @@ +/* + Tree order. +*/ + +#include "types.h" +#include "treeorder.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TreeOrder +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// A TreeOrder is an array of integers designed for writing sequentially a sorted list as a perfect +// binary tree to a file, thus enabling fast search with good locality when reading from the file. +// See http://en.wikipedia.org/wiki/Binary_tree and http://en.wikipedia.org/wiki/Binary_tree#Methods_for_storing_binary_trees. +// Considering a sorted list of size 2^n - 1, for i = 0..2^n - 2, TreeOrder[i] gives the index in the +// list for node i in a perfect binary tree (i is the node index, in writing order). +// Example for depth = 3 (7 elements): +// - Sorted list: +// +// [0][1][2][3][4][5][6] +// +// - Perfect binary tree containing all list elements: +// +// __3__ +// | | +// _1_ _5_ +// | | | | +// 0 2 4 6 +// +// - Related TreeOrder array: +// +// [3][1][5][0][2][4][6] + +// Following the tree order indirection, we can write a sorted list as a perfect binary tree +// directly to a file. + +// To avoid computing a TreeOrder array every time we need it, cache TreeOrder objects by depth. +SYSpVector TreeOrder::orders_; +Lock TreeOrder::lock_(true, "TreeOrder::lock_"); + +// STATIC +uint32_t TreeOrder::depth(const uint32_t cardinality) { + // Find depth of the nearest perfect tree. + uint32_t depth = 1; + while (cardinality > (static_cast(1) << depth) - 1) { + depth++; + } + return depth; +} + +// Creates a new tree order for the given depth. +TreeOrder::TreeOrder(const uint32_t depth) { + const uint32_t size = (static_cast(1) << depth) - 1; + resize(size); + forceLength(size); + + // Compute indirection using an iterative method. + uint32_t node = 0; + setListIndex(node++, size >> 1); // Starting node: middle of the list. + uint32_t width = (1 << (depth - 1)); // Starting width. + for (uint32_t i = 1; i < depth; ++i) { + width >>= 1; // Width decreases as we go down the tree. + for (uint32_t j = 0; j < (static_cast(1) << i); j += 2) { + const uint32_t parent = (node >> 1); // Parent node id. + setListIndex(node++, getListIndex(parent, size) - width); // Left child. + setListIndex(node++, getListIndex(parent, size) + width); // Right child. + } + } +} + +// Gets or creates a tree order for the given cardinality. +// STATIC +const TreeOrder& TreeOrder::get(const uint32_t cardinality) { + const uint32_t d = depth(cardinality); + Guard guard(lock_); + const uint32_t length = orders_.length(); + if (d >= length) { + orders_.resize(d + 1); + orders_.forceLength(d + 1); + for (uint32_t i = length; i <= d; ++i) { + orders_[i] = 0; + } + } + if (orders_[d] == 0) { + orders_[d] = new TreeOrder(d); + } + return *orders_[d]; +} + +} + + diff --git a/storage/sparrow/engine/treeorder.h b/storage/sparrow/engine/treeorder.h new file mode 100644 index 000000000000..ccd226136807 --- /dev/null +++ b/storage/sparrow/engine/treeorder.h @@ -0,0 +1,114 @@ +/* + Tree order. +*/ + +#ifndef _engine_treeorder_h_ +#define _engine_treeorder_h_ + +#include "vec.h" +#include "lock.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TreeNode +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TreeNode { +private: + + uint32_t start_; + uint32_t end_; + +public: + + TreeNode() : start_(0), end_(0) { + } + TreeNode(const uint32_t start, const uint32_t end) : start_(start), end_(end) { + } + uint32_t getStart() const { + return start_; + } + uint32_t getEnd() const { + return end_; + } +}; + +typedef SYSvector TreeNodes; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TreeOrderElement +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TreeOrderElement { +private: + + uint32_t listIndex_; + uint32_t nodeIndex_; + +public: + + TreeOrderElement() : listIndex_(0), nodeIndex_(0) { + } + uint32_t getListIndex() const { + return listIndex_; + } + void setListIndex(const uint32_t listIndex) { + listIndex_ = listIndex; + } + uint32_t getNodeIndex() const { + return nodeIndex_; + } + void setNodeIndex(const uint32_t nodeIndex) { + nodeIndex_ = nodeIndex; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TreeOrder +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Given a sorted list, this class gives the writing order of the related binary tree. +// It also gives the node index for a given list index. +class TreeOrder : private SYSvector { +private: + + static SYSpVector orders_; + static Lock lock_; + +private: + + void setListIndex(const uint32_t nodeIndex, const uint32_t listIndex) { + (*this)[nodeIndex].setListIndex(listIndex); + (*this)[listIndex].setNodeIndex(nodeIndex); + } + +public: + + TreeOrder(const uint32_t depth); + static uint32_t depth(const uint32_t cardinality); + static const TreeOrder& get(const uint32_t cardinality); + uint32_t getListIndex(const uint32_t nodeIndex, const uint32_t n) const { + // Adjust listIndex in case of almost perfect tree (remove extra leafs). + uint32_t listIndex = (*this)[nodeIndex].getListIndex(); + const uint32_t last = (n * 2) - length(); + if (listIndex > last) { + listIndex -= (listIndex - last) / 2; + } + return listIndex; + } + + uint32_t getNodeIndex(uint32_t listIndex, const uint32_t n) const { + // Reverse adjustment on listIndex in case of almost perfect tree. + const uint32_t last = (n * 2) - length(); + if (listIndex > last) { + listIndex = 2 * listIndex - last; + } + const uint32_t nodeIndex = (*this)[listIndex].getNodeIndex(); + return nodeIndex; + } +}; + +} + +#endif /* #ifndef _engine_treeorder_h_ */ diff --git a/storage/sparrow/engine/types.cc b/storage/sparrow/engine/types.cc new file mode 100644 index 000000000000..64567d87436c --- /dev/null +++ b/storage/sparrow/engine/types.cc @@ -0,0 +1,222 @@ +/* + Engine types. +*/ + +#include "types.h" +#include "fileutil.h" +#include "persistent.h" +#include "../handler/hasparrow.h" + +#include "../engine/log.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BlockCacheHint +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const BlockCacheHint BlockCacheHint::smallForward0_(BlockCacheHint::SMALL, BlockCacheHint::FORWARD, 0); +const BlockCacheHint BlockCacheHint::largeForward0_(BlockCacheHint::LARGE, BlockCacheHint::FORWARD, 0); +const BlockCacheHint BlockCacheHint::largeAround0_(BlockCacheHint::LARGE, BlockCacheHint::AROUND, 0); +const BlockCacheHint BlockCacheHint::largeForward1_(BlockCacheHint::LARGE, BlockCacheHint::FORWARD, 1); +const BlockCacheHint BlockCacheHint::largeBackward1_(BlockCacheHint::LARGE, BlockCacheHint::BACKWARD, 1); +const BlockCacheHint BlockCacheHint::largeAround1_(BlockCacheHint::LARGE, BlockCacheHint::AROUND, 1); +const BlockCacheHint BlockCacheHint::smallAround2_(BlockCacheHint::SMALL, BlockCacheHint::AROUND, 2); +const BlockCacheHint BlockCacheHint::mediumAround2_(BlockCacheHint::MEDIUM, BlockCacheHint::AROUND, 2); +const BlockCacheHint BlockCacheHint::largeForward2_(BlockCacheHint::LARGE, BlockCacheHint::FORWARD, 2); +const BlockCacheHint BlockCacheHint::largeBackward2_(BlockCacheHint::LARGE, BlockCacheHint::BACKWARD, 2); +const BlockCacheHint BlockCacheHint::smallForward3_(BlockCacheHint::SMALL, BlockCacheHint::FORWARD, 3); + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Str +////////////////////////////////////////////////////////////////////////////////////////////////////// + +const char* Str::empty_ = ""; + +// Timestamp is in milliseconds. +// STATIC +Str Str::fromTimestamp(const uint64_t timestamp) { + const uint32_t milliseconds = timestamp % 1000; + time_t tt = static_cast(timestamp / 1000); + struct tm t; + localtime_r(&tt, &t); + char buffer[64]; + snprintf(buffer, sizeof(buffer), "%04d/%02d/%02d %2d:%02d:%02d.%03u", 1900 + t.tm_year, t.tm_mon + 1, t.tm_mday, + t.tm_hour, t.tm_min, t.tm_sec, milliseconds); + return Str(buffer); +} + +// STATIC +Str Str::fromTimePeriod(const TimePeriod& period) { + if (period.isVoid()) { + return Str("]void["); + } + const uint64_t* low = period.getLow(); + Str sLow = low == 0 ? Str("-inf") : Str::fromTimestamp(*low); + const uint64_t* up = period.getUp(); + Str sUp = up == 0 ? Str("+inf") : Str::fromTimestamp(*up); + char buffer[128]; + snprintf(buffer, sizeof(buffer), "%s%s, %s%s", period.isLowerIncluded() ? "[" : "]", sLow.c_str(), + sUp.c_str(), period.isUpperIncluded() ? "]" : "["); + return Str(buffer); +} + +// Duration is in milliseconds. +// STATIC +Str Str::fromDuration(const uint64_t duration) { + char buffer[128]; + const uint32_t milliseconds = static_cast(duration % 1000); + if (duration < 1000) { + snprintf(buffer, sizeof(buffer), "%ums", milliseconds); + } else if (duration < 60000) { + if (milliseconds == 0) { + snprintf(buffer, sizeof(buffer), "%us", static_cast(duration / 1000)); + } else { + snprintf(buffer, sizeof(buffer), "%us%03ums", static_cast(duration / 1000), milliseconds); + } + } else if (duration < 3600000) { + snprintf(buffer, sizeof(buffer), "%um", static_cast(duration / 60000)); + } else if (duration < 86400000) { + const uint minutes = static_cast((duration % 3600000) / 60000); + if (minutes == 0) { + snprintf(buffer, sizeof(buffer), "%uh", static_cast(duration / 3600000)); + } else { + snprintf(buffer, sizeof(buffer), "%uh%um", static_cast(duration / 3600000), minutes); + } + } else { + const uint hours = static_cast((duration % 86400000) / 3600000); + if (hours == 0) { + snprintf(buffer, sizeof(buffer), "%ud", static_cast(duration / 86400000)); + } else { + snprintf(buffer, sizeof(buffer), "%ud%uh", static_cast(duration / 86400000), hours); + } + } + return Str(buffer); +} + +// Size is in bytes. +// STATIC +Str Str::fromSize(const uint64_t size) { + char buffer[128]; + if (size < static_cast(1024)) { + snprintf(buffer, sizeof(buffer), "%llu", static_cast(size)); + } else if (size < static_cast(1024) * 1024) { + snprintf(buffer, sizeof(buffer), "%llu KB", static_cast(size / 1024)); + } else if (size < static_cast(1024) * 1024 * 1024) { + snprintf(buffer, sizeof(buffer), "%llu MB", static_cast(size / 1024 / 1024)); + } else if (size < static_cast(1024) * 1024 * 1024 * 1024) { + snprintf(buffer, sizeof(buffer), "%.1f GB", static_cast(size) / 1024 / 1024 / 1024); + } else { + snprintf(buffer, sizeof(buffer), "%.1f TB", static_cast(size) / 1024 / 1024 / 1024 / 1024); + } + return Str(buffer); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowException +////////////////////////////////////////////////////////////////////////////////////////////////////// + +SparrowException::SparrowException(const char* text, const bool logged /* = true */, unsigned int err_code /*=0xFFFFFFFF*/ ) : logged_(logged), err_code_(err_code) { + strncpy(buffer_, text, sizeof(buffer_)-1); + buffer_[sizeof(buffer_)-1] = '\0'; +} + +// STATIC +SparrowException SparrowException::create(const bool addError, const char* format, ...) +{ + char buffer[1024]; + va_list varargs; + va_start(varargs, format); + vsnprintf(buffer, sizeof(buffer), format, varargs); + va_end(varargs); + if (addError) { + char error[1024]; +#ifdef _WIN32 + LPSTR serror = error; + if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, GetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), serror, sizeof(error), 0) == 0) { + snprintf(error, sizeof(error), "error %d", GetLastError()); + } else { // Windows adds a nasty new line char... + size_t l = strlen(error) - 1; + while (error[l] == '\n' || error[l] == '\r') { + error[l--] = 0; + } + } +#else + snprintf(error, sizeof(error), "%s", strerror(errno)); +#endif + char result[2050]; + snprintf(result, sizeof(result), "%s (%s)", buffer, error); + return SparrowException(result); + } else { + return SparrowException(buffer); + } +} + +void SparrowException::toLog() const { + if (logged_) { + spw_print_error("Sparrow: %s", getText()); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// CodeStateGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifndef NDEBUG +Lock CodeStateGuard::lock_(true, "CodeStateGuard::lock_"); +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ColumnEx +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// STATIC +const char* ColumnEx::getSqlType(const ColumnType type) { + switch(type) { + case COL_BLOB: return "VARBINARY"; + case COL_BYTE: return "TINYINT"; + case COL_DOUBLE: return "DOUBLE"; + case COL_INT: return "INT"; + case COL_LONG: return "BIGINT"; + case COL_STRING: return "VARCHAR"; + case COL_TIMESTAMP: return "TIMESTAMP"; + case COL_SHORT: return "SMALLINT"; + default: + return ""; + } +} + +Str ColumnEx::getDefinition() const { + char tmp[1024]; + char* buffer = tmp; + const ColumnType type = getType(); + buffer += sprintf(buffer, "`%s` %s", getName().c_str(), getSqlType(type)); + if ( type == COL_TIMESTAMP ) { + // For timestamps, the decimal precision (0..6) is stored in the info_ field. But older client applications may still use the string size. + // Therefore, check both. + uint decimals = getStringSize() != 0 ? getStringSize() : getInfo(); + buffer += sprintf(buffer, "(%u)", decimals); + } + if (type == COL_BLOB || type == COL_STRING) { + buffer += sprintf(buffer, "(%u)", getStringSize()); + } + if (isFlagSet(COL_UNSIGNED)) { + buffer += sprintf(buffer, " UNSIGNED"); + } + if (isFlagSet(COL_NULLABLE)) { + buffer += sprintf(buffer, " NULL"); + } else { + buffer += sprintf(buffer, " NOT NULL"); + } + const Str& defaultValue = getDefaultValue(); + if (defaultValue.length() != 0) { + buffer += sprintf(buffer, " DEFAULT '%s'", defaultValue.c_str()); + } + if (isFlagSet(COL_AUTO_INC)) { + buffer += sprintf(buffer, " AUTO_INCREMENT"); + } + return Str(tmp); +} + +} diff --git a/storage/sparrow/engine/types.h b/storage/sparrow/engine/types.h new file mode 100644 index 000000000000..91a81ff444bd --- /dev/null +++ b/storage/sparrow/engine/types.h @@ -0,0 +1,710 @@ +/* + Engine types. +*/ + +#ifndef _engine_types_h_ +#define _engine_types_h_ + + +#include "interval.h" +#include "lock.h" +#include "misc.h" +#include "vec.h" +#include "hash.h" +#include "serial.h" +#include "exception.h" +#include "treeorder.h" +#include "../handler/plugin.h" // For configuration parameters. + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Column +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Column type as described in the storage adapter. +// Because of the @!*$& MySQL macros, we have to add a "COL_" suffix... +enum ColumnType { + COL_BLOB, + COL_BYTE, + COL_DOUBLE, + COL_INT, + COL_LONG, + COL_STRING, + COL_TIMESTAMP, + COL_SHORT, + COL_UNKNOWN +}; + +enum ColumnFlags { + COL_NULLABLE = 1, // Column can contain NULLs. + COL_IP_ADDRESS = 2, // Column contains IP addresses. + COL_IP_LOOKUP = 4, // Column contains IP address lookups. + COL_DNS_IDENTIFIER = 8, // Column gives the DNS identifier. + COL_AUTO_INC = 16, // Column is auto incremental. + COL_UNSIGNED = 32 // Column values are unsigned. +}; + +enum FieldType { + FIELD_NONE, + FIELD_NORMAL, // Field returns value read from record. + FIELD_DEFAULT, // Field returns column's default value. + FIELD_SKIP // Field skips value in record. +}; + +// Describes a table column. +class Column { + friend ByteBuffer& operator >> (ByteBuffer& buffer, Column& column); + friend ByteBuffer& operator << (ByteBuffer& buffer, const Column& column); + +private: + + Str name_; + Str charset_; + ColumnType type_; + uint32_t flags_; // Flags. See ColumnFlags. + uint32_t info_; // Additional info; meaning depends on flags_. + uint32_t serial_; // Alteration serial number of creation. + uint32_t dropSerial_; // Alteration serial number of drop. + Str defaultValue_; // Default value. + +public: + + Column() : type_(COL_UNKNOWN), flags_(0), info_(0), serial_(0), dropSerial_(0) { + } + + Column(const Str& name) : name_(name), type_(COL_UNKNOWN), flags_(0), info_(0), serial_(0), dropSerial_(0) { + } + + Column(const char* name, const ColumnType type, const uint32_t flags, const uint32_t info, const char* charset, const Str& defaultValue) + : name_(name), charset_(charset), type_(type), flags_(flags), info_(info), serial_(0), dropSerial_(0), defaultValue_(defaultValue) { + } + + ~Column() { + } + const Str& getName() const { + return name_; + } + ColumnType getType() const { + return type_; + } + bool isString() const { + return type_ == COL_BLOB || type_ == COL_STRING; + } + uint32_t getFlags() const { + return flags_; + } + bool isFlagSet(const ColumnFlags flag) const { + return (flags_ & flag) != 0; + } + void addFlag(const ColumnFlags flag) { + flags_ |= flag; + } + uint32_t getInfo() const { + return info_; + } + void setInfo(const uint32_t info) { + info_ = info; + } + uint32_t getSerial() const { + return serial_; + } + void setSerial(const uint32_t serial) { + serial_ = serial; + } + uint32_t getDropSerial() const { + return dropSerial_; + } + bool isDropped() const { + return getDropSerial() != 0; + } + void drop(const uint32_t serial) { + assert(dropSerial_ == 0); + assert(serial > getSerial()); + dropSerial_ = serial; + } + const Str& getDefaultValue() const { + return defaultValue_; + } + uint32_t getDataSize() const { + switch (getType()) { + case COL_BLOB: return 16; + case COL_BYTE: return 1; + case COL_SHORT: return 2; + case COL_DOUBLE: return 8; + case COL_INT: return 4; + case COL_LONG: return 8; + case COL_STRING: return 16; + case COL_TIMESTAMP: return 8; + default: assert(0); return 0; + } + } + uint32_t getBits() const { + if (isString()) { + return isFlagSet(COL_NULLABLE) ? 6 : 5; + } else { + return isFlagSet(COL_NULLABLE) ? 1 : 0; + } + } + const Str& getCharset() const { + return charset_; + } + + // To check if new table definition is compatible with existing one. + // - Column size does not matter. + // - Unsigned flag does not matter. + // Note the name can be empty when upgrading from older versions. + bool operator == (const Column& right) const { + return type_ == right.type_ + && (flags_ & COL_NULLABLE) == (right.flags_ & COL_NULLABLE) + && (name_.length() == 0 || right.name_.length() == 0 || name_ == right.name_); + } + bool operator != (const Column& right) const { + return !(*this == right); + } +}; + +typedef SYSvector Columns; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, Column& column) { + if (buffer.getVersion() >= 6) { + buffer >> column.name_; + } + if (buffer.getVersion() == 1) { + Str dummy; + buffer >> dummy; + } + int type; + buffer >> type; + column.type_ = static_cast(type); + if (buffer.getVersion() < 7) { + uint32_t size; + buffer >> size; + } + buffer >> column.flags_ >> column.info_ >> column.charset_; + if (buffer.getVersion() >= 16) { + buffer >> column.serial_; + if (buffer.getVersion() >= 19) { + buffer >> column.dropSerial_; + } else { + bool dummy; + buffer >> dummy; + } + } + if (buffer.getVersion() >= 18) { + buffer >> column.defaultValue_; + } + if (buffer.getVersion() < 24 && type == COL_TIMESTAMP) { + column.info_ = UINT_MAX; + } + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const Column& column) { + buffer << column.name_ << static_cast(column.type_) << column.flags_ << column.info_ + << column.charset_ << column.serial_ << column.dropSerial_ << column.defaultValue_; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ColumnEx +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ColumnEx : public Column { + friend ByteBuffer& operator >> (ByteBuffer& buffer, ColumnEx& column); + +private: + + uint32_t stringSize_; + +public: + + ColumnEx() : Column() { + } + + ~ColumnEx() { + } + + uint32_t getStringSize() const { + return stringSize_; + } + + static const char* getSqlType(const ColumnType type); + + Str getDefinition() const; +}; + +typedef SYSvector ColumnExs; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, ColumnEx& column) { + buffer >> static_cast(column); + buffer >> column.stringSize_; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Index +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Describes a table index. +typedef SYSvector IndexIds; +typedef SYSvector ColumnIds; +typedef SYSvector ColumnPos; +typedef SYSvector ColumnIdsArray; +class Index { + friend ByteBuffer& operator >> (ByteBuffer& buffer, Index& index); + friend ByteBuffer& operator << (ByteBuffer& buffer, const Index& index); + +private: + + Str name_; + ColumnIds columns_; + bool unique_; + bool dropped_; + +public: + + Index() : unique_(false), dropped_(false) { + } + + Index(const char* name, const ColumnIds& columns, bool unique) + : name_(name), columns_(columns), unique_(unique), dropped_(false) { + } + + Index(const Index& right) { + *this = right; + } + + Index& operator = (const Index& right) = default; + + bool operator == (const Index& right) const { + return dropped_ == right.dropped_ && unique_ == right.unique_ && columns_ == right.columns_; + } + + const Str& getName() const { + return name_; + } + + void setName(const Str& name) { + name_ = name; + } + + const ColumnIds& getColumnIds() const { + return columns_; + } + + bool isUnique() const { + return unique_; + } + + bool isDropped() const { + return dropped_; + } + + void drop() { + dropped_ = true; + } +}; + +typedef SYSvector Indexes; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, Index& index) { + uint32_t id; + const uint32_t version = buffer.getVersion(); + if (version < 11) { + buffer >> id; + } + if (version < 6) { + char buff[16]; + snprintf(buff, sizeof(buff), "index_%u", id); + index.name_ = Str(buff); + } else { + buffer >> index.name_; + } + buffer >> index.columns_ >> index.unique_; + if (version < 12) { + index.dropped_ = index.columns_.isEmpty(); + } else { + buffer >> index.dropped_; + } + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const Index& index) { + buffer << index.name_ << index.columns_ << index.unique_ << index.dropped_; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Alteration +////////////////////////////////////////////////////////////////////////////////////////////////////// + +enum AlterationType { + ALT_UNKNOWN, + ALT_ADD_INDEX, + ALT_DROP_INDEX +}; + +class AlterationStats { +private: + + int64_t deltaDataSize_; + int64_t deltaIndexSize_; + +public: + + AlterationStats() : deltaDataSize_(0), deltaIndexSize_(0) { + } + + AlterationStats(const int64_t deltaDataSize, const int64_t deltaIndexSize) + : deltaDataSize_(deltaDataSize), deltaIndexSize_(deltaIndexSize) { + } + + AlterationStats& operator += (const AlterationStats& right) { + Atomic::add64(reinterpret_cast(&deltaDataSize_), right.deltaDataSize_); + Atomic::add64(reinterpret_cast(&deltaIndexSize_), right.deltaIndexSize_); + return *this; + } + + int64_t getDeltaDataSize() const { + return deltaDataSize_; + } + + int64_t getDeltaIndexSize() const { + return deltaIndexSize_; + } +}; + +class Master; +class PersistentPartition; +class PrintBuffer; +class Alteration { + friend ByteBuffer& operator >> (ByteBuffer& buffer, Alteration& alteration); + friend ByteBuffer& operator << (ByteBuffer& buffer, const Alteration& alteration); + +private: + + AlterationType type_; + uint32_t serial_; + uint32_t id_; + +public: + + Alteration() : type_(ALT_UNKNOWN), serial_(0), id_(0) { + } + + Alteration(const AlterationType type, const uint32_t serial, const uint32_t id) + : type_(type), serial_(serial), id_(id) { + } + + Alteration(const Alteration& right) { + *this = right; + } + + Alteration& operator = (const Alteration& right) = default; + + AlterationType getType() const { + return type_; + } + + uint32_t getSerial() const { + return serial_; + } + + uint32_t getId() const { + return id_; + } + + Str getDescription(const Master& master) const; +}; + +typedef SYSvector Alterations; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, Alteration& alteration) { + int type; + buffer >> type; + alteration.type_ = static_cast(type); + buffer >> alteration.serial_ >> alteration.id_; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const Alteration& alteration) { + buffer << static_cast(alteration.type_) << alteration.serial_ << alteration.id_; + return buffer; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ForeignKey +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Describes a table foreign key. +class ForeignKey { + friend ByteBuffer& operator >> (ByteBuffer& buffer, ForeignKey& foreignKey); + friend ByteBuffer& operator << (ByteBuffer& buffer, const ForeignKey& foreignKey); + +private: + + Str name_; + int columnId_; + Str databaseName_; + Str tableName_; + Str columnName_; + +public: + + ForeignKey() : columnId_(0) { + } + + ForeignKey(const char* name, int columnId, const Str& databaseName, const Str& tableName, + const Str& columnName) + : name_(name), columnId_(columnId), databaseName_(databaseName), + tableName_(tableName), columnName_(columnName) { + } + + ForeignKey(const ForeignKey& right) { + *this = right; + } + + ForeignKey& operator = (const ForeignKey& right) = default; + + const Str& getName() const { + return name_; + } + + int getColumnId() const { + return columnId_; + } + + const Str& getDatabaseName() const { + return databaseName_; + } + + const Str& getTableName() const { + return tableName_; + } + + const Str& getColumnName() const { + return columnName_; + } + + bool operator == (const ForeignKey& right) const { + return name_ == right.name_ && columnId_ == right.columnId_ && databaseName_ == right.databaseName_ + && tableName_ == right.tableName_ && columnName_ == right.columnName_; + } +}; + +typedef SYSvector ForeignKeys; + +inline ByteBuffer& operator >> (ByteBuffer& buffer, ForeignKey& foreignKey) { + buffer >> foreignKey.name_ >> foreignKey.columnId_ >> foreignKey.databaseName_ + >> foreignKey.tableName_>> foreignKey.columnName_; + return buffer; +} + +inline ByteBuffer& operator << (ByteBuffer& buffer, const ForeignKey& foreignKey) { + buffer << foreignKey.name_ << foreignKey.columnId_ << foreignKey.databaseName_ + << foreignKey.tableName_ << foreignKey.columnName_; + return buffer; +} + +// Time period: milliseconds since epoch (1970). +typedef Interval TimePeriod; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileSection +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FileSection { + friend ByteBuffer& operator << (ByteBuffer& buffer, const FileSection& section); + friend ByteBuffer& operator >> (ByteBuffer& buffer, FileSection& section); + +private: + + uint64_t offset_; // Offset in file. + uint64_t size_; // Size, in bytes. + +public: + + FileSection() : offset_(0), size_(0) { + } + + FileSection(const uint64_t offset, const uint64_t size) : offset_(offset), size_(size) { + } + + void setOffset(uint64_t offset) { + offset_ = offset; + } + + uint64_t getOffset() const { + return offset_; + } + + void setSize(uint64_t size) { + size_ = size; + } + + uint64_t getSize() const { + return size_; + } + + uint64_t getCount(const char* name, const uint32_t size) const _THROW_(SparrowException) { + if (size == 0 || (size_ % size) != 0) { + throw SparrowException::create(false, "size of section \"%s\" (%llu) is not a multiple of record size (%u)", name, static_cast(size_), size); + } + return size_ / size; + } + + bool contains(const uint64_t offset) const { + return offset >= offset_ && offset < offset_ + size_; + } +}; + +inline ByteBuffer& operator << (ByteBuffer& buffer, const FileSection& section) { + buffer << section.offset_ << section.size_; + return buffer; +} + +inline ByteBuffer& operator >> (ByteBuffer& buffer, FileSection& section) { + buffer >> section.offset_ >> section.size_; + return buffer; +} + +#define DATA_FILE UINT_MAX +#define STRING_FILE (UINT_MAX - 1) + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FileHeaderBase +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FileReader; +class FileHeaderBase { +public: + + virtual ~FileHeaderBase() { + } + + virtual uint64_t getStart() const = 0; + + virtual uint64_t getEnd() const = 0; + + virtual uint64_t getRecords() const = 0; + + virtual uint32_t getRecordSize() const = 0; + + virtual bool isTreeComplete() const = 0; + + virtual uint32_t getNodes() const = 0; + + virtual uint32_t getMinNode() const = 0; + + virtual uint32_t getMaxNode() const = 0; + + virtual uint32_t getPrevNode(const uint32_t node) const = 0; + + virtual uint32_t getNextNode(const uint32_t node) const = 0; + + virtual uint64_t seekTree(FileReader& reader, const uint64_t node) const = 0; + + virtual uint64_t seekTreeData(FileReader& reader, const uint64_t node) const = 0; + + virtual uint64_t seekRecord(FileReader& reader, const uint64_t record) const = 0; + + virtual uint64_t seekRecordData(FileReader& reader, const uint64_t record) const = 0; + + virtual uint64_t seekBin(FileReader& reader, const uint64_t offset) const = 0; + + virtual const FileSection& getStringsSection() const = 0; + + virtual uint64_t getTotalSize() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BlockCacheHint +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Hint to initialize a FileBlock from disk. +class BlockCacheHint { +public: + + enum Size { + SMALL, + MEDIUM, + LARGE + }; + + enum Direction { + FORWARD, + BACKWARD, + AROUND + }; + + static const BlockCacheHint smallForward0_; + static const BlockCacheHint largeForward0_; + static const BlockCacheHint largeAround0_; + static const BlockCacheHint largeForward1_; + static const BlockCacheHint largeBackward1_; + static const BlockCacheHint largeAround1_; + static const BlockCacheHint smallAround2_; + static const BlockCacheHint mediumAround2_; + static const BlockCacheHint largeForward2_; + static const BlockCacheHint largeBackward2_; + static const BlockCacheHint smallForward3_; + +private: + + const Size size_; // Block size. + const Direction direction_; // Read direction. + const uint32_t level_; // Cache level (0..3). + +private: + + BlockCacheHint(const BlockCacheHint&); + BlockCacheHint& operator = (const BlockCacheHint&); + + BlockCacheHint(); + + BlockCacheHint(const Size size, const Direction direction, const uint32_t level) + : size_(size), direction_(direction), level_(level) { + } + +public: + + Size getSize() const { + return size_; + } + + Direction getDirection() const { + return direction_; + } + + uint32_t getLevel() const { + return level_; + } + + uint32_t getReadBlockSize() const { + switch (getSize()) { + case SMALL: + return sparrow_small_read_block_size; + case MEDIUM: + return sparrow_medium_read_block_size; + case LARGE: + return sparrow_large_read_block_size; + default: + assert(0); + return 0; + } + } + + bool operator == (const BlockCacheHint& right) const { + return size_ == right.size_ && direction_ == right.direction_ && level_ == right.level_; + } + + bool operator != (const BlockCacheHint& right) const { + return !(*this == right); + } +}; + +} + +#endif /* #ifndef _engine_types_h_ */ + diff --git a/storage/sparrow/engine/vec.h b/storage/sparrow/engine/vec.h new file mode 100644 index 000000000000..4e3b28fc5190 --- /dev/null +++ b/storage/sparrow/engine/vec.h @@ -0,0 +1,1481 @@ +/* + Vector types. + */ + +#ifndef _engine_vec_h_ +#define _engine_vec_h_ + +#include + +#include "list.h" + +namespace Sparrow { + +// Constant for "not found". +#ifndef SYS_NPOS +#define SYS_NPOS (~(static_cast(0))) +#endif + +// +// Default allocator for vectors +// +template class SYSallocator { +private: + + uint32_t capacity_; + +public: + + SYSallocator() : capacity_(0) { + } + uint32_t getCount() const; + void resetCount(); + T* build(uint32_t n); + void destroy(T* p); +}; + +template inline uint32_t SYSallocator::getCount() const { + return capacity_; +} + +template inline void SYSallocator::resetCount() { + capacity_ = 0; +} + +template inline T* SYSallocator::build(uint32_t n) { + capacity_ = n; + return new T[n]; +} + +template inline void SYSallocator::destroy(T* p) { + delete[] p; +} + +// +// SYSarray: simple array +// +template > class SYSarray : public A { +public: + + SYSarray(const uint32_t size = 0); + SYSarray(const uint32_t size, const T& init); + SYSarray(const SYSarray& right); + ~SYSarray(); + + // accessors + uint32_t length() const; + const T& operator [](const uint32_t index) const; + T& operator [](const uint32_t index); + const T* data() const { + return array_; + } + T* data() { + return array_; + } + + // operations + void clear(); + void reshape(const uint32_t n, const bool doCopy = true); + + // operators + SYSarray& operator =(const SYSarray& right); + bool operator ==(const SYSarray& right) const; + +protected: + + void copy(T* destination, const T* source, uint32_t n); + +protected: + + T* array_; +}; + +template inline void SYSarray::copy(T* destination, const T* source, uint32_t n) { + assert(source != destination && n > 0); + + // in case arrays overlap + if (destination < source) { + while (n-- > 0) { + *destination++ = *source++; + } + } else { + destination += n; + source += n; + while (n-- > 0) { + *--destination = *--source; + } + } +} + +template inline uint32_t SYSarray::length() const { + return (array_ == 0 ? 0 : this->getCount()); +} + +template inline void SYSarray::reshape(const uint32_t n, const bool doCopy /* = true */) { + const uint32_t l = length(); + if (n == 0) { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + } + array_ = 0; + } else if (n != l) { + T* newArray = this->build(n); + if (array_ != 0) { + if (doCopy && l > 0) { + copy(newArray, array_, l > n ? n : l); + } + this->destroy(array_); + } + array_ = newArray; + } +} + +// constructors +template inline SYSarray::SYSarray(const uint32_t size /* = 0 */) : A() { + array_ = 0; + reshape(size, false); +} + +template inline SYSarray::SYSarray(const uint32_t size, const T& init) : A() { + array_ = 0; + reshape(size, false); + for (uint32_t i = 0; i < size; ++i) { + array_[i] = init; + } +} + +template inline const T& SYSarray::operator [](const uint32_t index) const { + assert(index < length()); + return array_[index]; +} + +template inline T& SYSarray::operator [](const uint32_t index) { + assert(index < length()); + return array_[index]; +} + +template inline void SYSarray::clear() { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + array_ = 0; + } +} + +template inline SYSarray::~SYSarray() { + clear(); +} + +template inline SYSarray& SYSarray::operator =(const SYSarray& right) { + if (this == &right) { + return *this; + } + const uint32_t l = right.length(); + reshape(l, false); + if (l > 0) { + copy(array_, right.array_, l); + } + return *this; +} + +template inline bool SYSarray::operator ==(const SYSarray& right) const { + if (length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < length(); ++i) { + if (!((*this)[i] == right[i])) { + return false; + } + } + return true; +} + +// copy constructor +template inline SYSarray::SYSarray(const SYSarray& right) : A() { + array_ = 0; + *this = right; +} + +// +// SYSvector: simple vector +// note: SYSvector inherits from the allocator to perform empty base optimization (EBO) +// +template > class SYSvector: public A { +public: + + SYSvector(const uint32_t size = 0); + SYSvector(const SYSvector& right); + ~SYSvector(); + + // accessors + uint32_t entries() const; + bool isEmpty() const; + uint32_t length() const; + uint32_t capacity() const; + const T& operator [](const uint32_t index) const; + T& operator [](const uint32_t index); + const T& first() const; + T& first(); + const T& last() const; + T& last(); + uint32_t index(const T& t) const; + bool contains(const T& t) const; + const T* data() const; + + // operations + void insertAt(const uint32_t index, const T& t); + void removeAt(const uint32_t index); + void removeFirst(); + void removeLast(); + bool remove(const T& t); + void append(const T& t); + void append(const SYSvector& right); + void insert(const T& t); + void resize(const uint32_t n, const bool canShrink = false, const bool doCopy = true); + void reshape(const uint32_t n, const bool doCopy = true); + void clear(); + void forceLength(const uint32_t length) { + if (length <= capacity()) { + n_ = length; + } + } + bool contains(const SYSvector& vect) const; + bool containsTheSame(const SYSvector& vect) const; + + // operators + SYSvector& operator =(const SYSvector& right); + bool operator ==(const SYSvector& right) const; + +protected: + + void copy(T* destination, const T* source, uint32_t n); + +protected: + + T* array_; + uint32_t n_; +}; + +// Returns true if this all the items from this vector also exist in the the vector passed as argument. +template inline bool SYSvector::contains(const SYSvector& vect) const { + for (uint32_t i = 0; i < length(); ++i) { + if (!(vect.contains((*this)[i]))) { + return false; + } + } + return true; +} + +// Returns true if the two vectors contain the same items, independent of their position in the vector. +template inline bool SYSvector::containsTheSame(const SYSvector& vect) const { + if (length() != vect.length()) { + return false; + } + return contains(vect); +} + +template inline void SYSvector::copy(T* destination, const T* source, uint32_t n) { + assert(source != destination); + + // in case arrays overlap + if (destination < source) { + while (n-- > 0) { + *destination++ = *source++; + } + } else { + destination += n; + source += n; + while (n-- > 0) { + *--destination = *--source; + } + } +} + +template inline uint32_t SYSvector::capacity() const { + return (array_ == 0 ? 0 : this->getCount()); +} + +template inline void SYSvector::resize(const uint32_t n, bool canShrink /* = false */, bool doCopy /* = true */) { + // cannot shrink under the number of elements, unless specified + if (!canShrink && n < n_) + return; + if (n == 0) { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + } + array_ = 0; + } else if (n != capacity()) { + T* newArray = this->build(n); + if (array_ != 0) { + if (doCopy && n_ > 0) { + copy(newArray, array_, n_ > n ? n : n_); + } + this->destroy(array_); + } + array_ = newArray; + } +} + +template inline void SYSvector::reshape(const uint32_t n, const bool doCopy /* = true */) { + // shrinking allowed + resize(n, true, doCopy); + n_ = n; +} + +template inline void SYSvector::insertAt(const uint32_t index, const T& t) { + assert(index <= n_); + uint32_t size = capacity(); + T* dest = array_; + assert(size >= n_); + if (size == n_) { + T* newArray = this->build(n_ + (G == 0 ? 1 : G)); + dest = newArray; + if (index > 0) { + copy(dest, array_, index); + } + } + if (n_ > index) { + copy(dest + index + 1, array_ + index, n_ - index); + } + dest[index] = t; + n_++; + if (dest != array_) { + if (array_ != 0) { + this->destroy(array_); + } + array_ = dest; + } +} + +template inline void SYSvector::append(const T& t) { + this->insertAt(n_, t); +} + +template inline void SYSvector::append(const SYSvector& right) { + resize(this->entries() + right.entries()); + for (uint i=0; i inline void SYSvector::insert(const T& t) { + this->insertAt(n_, t); +} + +template inline void SYSvector::removeAt(const uint32_t index) { + assert(index < n_); + if (n_ == 1 && G == 0) { + this->destroy(array_); + this->resetCount(); + array_ = 0; + n_ = 0; + } else { + T* dest = array_; + if (G == 0) { + T* newArray = this->build(n_ - 1); + dest = newArray; + if (index > 0) { + copy(dest, array_, index); + } + } + n_--; + if (n_ > index) { + copy(dest + index, array_ + index + 1, n_ - index); + } + if (dest != array_) { + this->destroy(array_); + array_ = dest; + } else { + array_[n_] = T(); + } + } +} + +template inline void SYSvector::removeFirst() { + removeAt(0); +} + +template inline void SYSvector::removeLast() { + assert(n_ > 0); + removeAt(n_ - 1); +} + +// constructor +template inline SYSvector::SYSvector(const uint32_t size /* = 0 */) { + array_ = 0; + n_ = 0; + resize(size); +} + +template inline uint32_t SYSvector::entries() const { + return n_; +} + +template inline bool SYSvector::isEmpty() const { + return n_ == 0; +} + +template inline uint32_t SYSvector::length() const { + return n_; +} + +template inline const T& SYSvector::operator [](const uint32_t index) const { + assert(index < n_); + return array_[index]; +} + +template inline T& SYSvector::operator [](const uint32_t index) { + assert(index < n_); + return array_[index]; +} + +template inline void SYSvector::clear() { + if (array_ != 0) { + this->destroy(array_); + this->resetCount(); + array_ = 0; + n_ = 0; + } +} + +template inline SYSvector::~SYSvector() { + clear(); +} + +template inline SYSvector& SYSvector::operator =(const SYSvector& right) { + if (this == &right) { + return *this; + } + clear(); + resize(right.capacity()); + const uint32_t l = right.length(); + if (l > 0) { + copy(array_, right.array_, l); + } + n_ = l; + return *this; +} + +template inline bool SYSvector::operator ==(const SYSvector& right) const { + if (length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < length(); ++i) { + if (!((*this)[i] == right[i])) { + return false; + } + } + return true; +} + +// copy constructor +template inline SYSvector::SYSvector(const SYSvector& right) : A() { + array_ = 0; + n_ = 0; + *this = right; +} + +template inline const T& SYSvector::first() const { + return (*this)[0]; +} + +template inline T& SYSvector::first() { + return (*this)[0]; +} + +template inline const T& SYSvector::last() const { + assert(n_ > 0); + return (*this)[n_ - 1]; +} + +template inline T& SYSvector::last() { + assert(n_ > 0); + return (*this)[n_ - 1]; +} + +template inline uint32_t SYSvector::index(const T& t) const { + for (uint32_t i = 0; i < n_; ++i) { + if (array_[i] == t) { + return i; + } + } + return SYS_NPOS; +} + +template inline bool SYSvector::remove(const T& t) { + const uint32_t i = index(t); + const bool found = (i != SYS_NPOS); + if (found) { + removeAt(i); + } + return found; +} + +template inline bool SYSvector::contains(const T& t) const { + return (index(t) != SYS_NPOS); +} + +template inline const T* SYSvector::data() const { + return array_; +} + +// +// SYSpVector: vector of pointers +// +template > class SYSpVector: public SYSvector { +public: + + SYSpVector(const uint32_t size = 0) : SYSvector(size) { + } + + // accessors + T* find(const T* t) const; + uint32_t index(const T* t) const; + bool contains(const T* t) const; + + // operations + T* remove(const T* t); + void clearAndDestroy(); + + // operators + bool operator ==(const SYSpVector& right) const; +}; + +template inline uint32_t SYSpVector::index(const T* t) const { + for (uint32_t i = 0; i < this->length(); ++i) { + T* v = (*this)[i]; + if (*v == *t) { + return i; + } + } + return SYS_NPOS; +} + +template inline T* SYSpVector::find(const T* t) const { + const uint32_t i = index(t); + if (i == SYS_NPOS) { + return 0; + } else { + return (*this)[i]; + } +} + +template inline bool SYSpVector::contains(const T* t) const { + return (index(t) != SYS_NPOS); +} + +template inline T* SYSpVector::remove(const T* t) { + T* result = 0; + for (uint32_t i = 0; i < this->length(); ++i) { + T* v = (*this)[i]; + if (*v == *t) { + result = (*this)[i]; + this->removeAt(i); + break; + } + } + return result; +} + +template inline void SYSpVector::clearAndDestroy() { + for (uint32_t i = 0; i < this->length(); ++i) { + delete (*this)[i]; + } + this->clear(); +} + +template inline bool SYSpVector::operator ==(const SYSpVector& right) const { + if (this->length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < this->length(); ++i) { + if (!(*(*this)[i] == *right[i])) { + return false; + } + } + return true; +} + +// +// SYSsortedVector: sorted vector +// +template > class SYSsortedVector: public SYSvector { +public: + + SYSsortedVector(const uint32_t size = 0) : SYSvector(size) { + } + + SYSsortedVector& operator = (const SYSvector& right); + SYSsortedVector(const SYSvector& right); + + // accessors + uint32_t index(const T& t) const; + bool contains(const T& t) const; + + // operations + void insert(const T& t); + bool remove(const T& t); + bool insertIfAbsent(const T& t); + + // operators + bool operator ==(const SYSsortedVector& right) const; + + bool bsearch(const T& t, uint32_t& index, const int mode) const; + +#ifndef NDEBUG + bool isSorted() const; +#endif +}; + +template inline bool SYSsortedVector::bsearch(const T& t, uint32_t& index, const int mode) const { + // mode = 0: check if object exists + // mode = 1: find first occurrence + // mode = 2: find for insertion + bool result = false; + index = 0; + if (this->n_ > 0) { + uint32_t top = this->n_ - 1; + uint32_t bottom = 0; + while (top > bottom) { + index = (top + bottom) >> 1; + const T& v = (*this)[index]; + if (t == v) { + result = true; + break; + } else if (t < v) { + top = index ? index - 1 : 0; + } else { + bottom = index + 1; + } + } + if (!result) { + index = bottom; + if (t == (*this)[index]) { + result = true; + } + } + if (result) { + if (mode == 1) { + // go down to the first one + while (index > 0 && t == (*this)[index - 1]) { + index--; + } + } else if (mode == 2) { + // found; move up to the insertion position + index++; + while (index < this->n_ && t == (*this)[index]) { + index++; + } + } + } else { + if (mode == 2) { + // not found; move up to the insertion position + while (index < this->n_ && (*this)[index] < t) { + index++; + } + } + } + } + return result; +} + +template inline uint32_t SYSsortedVector::index(const T& t) const { + assert(isSorted()); + uint32_t index; + if (bsearch(t, index, 1)) { + return index; + } else { + return SYS_NPOS; + } +} + +template inline bool SYSsortedVector::contains(const T& t) const { + assert(isSorted()); + uint32_t index; + return bsearch(t, index, 0); +} + +template inline void SYSsortedVector::insert(const T& t) { + assert(isSorted()); + uint32_t index; + bsearch(t, index, 2); + this->insertAt(index, t); + assert(isSorted()); +} + +template inline bool SYSsortedVector::insertIfAbsent(const T& t) { + assert(isSorted()); + uint32_t index; + if (!bsearch(t, index, 2)) { + this->insertAt(index, t); + assert(isSorted()); + return true; + } else { + return false; + } +} + +template inline SYSsortedVector& SYSsortedVector::operator = (const SYSvector& right) { + SYSvector::clear(); + const uint32_t n = right.entries(); + SYSvector::resize(n, false, false); + for (uint32_t i = 0; i < n; ++i) { + insert(right[i]); + } + return *this; +} + +template inline SYSsortedVector::SYSsortedVector(const SYSvector& right) { + *this = right; +} + +template inline bool SYSsortedVector::remove(const T& t) { + assert(isSorted()); + uint32_t index; + if (bsearch(t, index, 1)) { + this->removeAt(index); + assert(isSorted()); + return true; + } else { + return false; + } +} + +template inline bool SYSsortedVector::operator ==(const SYSsortedVector& right) const { + if (this->length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < this->length(); ++i) { + if (!((*this)[i] == right[i])) { + return false; + } + } + return true; +} + +#ifndef NDEBUG +template inline bool SYSsortedVector::isSorted() const { + if (this->n_ < 2) { + return true; + } + for (uint32_t index = 0; index < this->n_ - 1; ++index) { + if (!((*this)[index] < (*this)[index + 1]) && !((*this)[index] == (*this)[index + 1])) { + return false; + } + } + return true; +} +#endif + +// +// SYSpSortedVector: sorted vector of pointers +// +template > class SYSpSortedVector: public SYSvector { +public: + + SYSpSortedVector(const uint32_t size = 0) : SYSvector(size) { + } + + // accessors + uint32_t index(const T* t) const; + bool contains(const T* t) const; + T* find(const T* t) const; + + // operations + void insert(T* t); + T* remove(const T* t); + void clearAndDestroy(); + + // operators + bool operator ==(const SYSpSortedVector& right) const; + + bool bsearch(const T& t, uint32_t& index, const int mode) const; + +#ifndef NDEBUG + bool isSorted() const; +#endif +}; + +template inline bool SYSpSortedVector::bsearch(const T& t, uint32_t& index, const int mode) const { + // mode = 0: check if object exists + // mode = 1: find first occurrence + // mode = 2: find for insertion + bool result = false; + index = 0; + if (this->n_ > 0) { + uint32_t top = this->n_ - 1; + uint32_t bottom = 0; + while (top > bottom) { + index = (top + bottom) >> 1; + const T& v = *((*this)[index]); + if (t == v) { + result = true; + break; + } else if (t < v) { + top = index ? index - 1 : 0; + } else { + bottom = index + 1; + } + } + if (!result) { + index = bottom; + if (t == *((*this)[index])) { + result = true; + } + } + if (result) { + if (mode == 1) { + // go down to the first one + while (index > 0 && t == *((*this)[index - 1])) { + index--; + } + } else if (mode == 2) { + // found; move up to the insertion position + index++; + while (index < this->n_ && t == *((*this)[index])) { + index++; + } + } + } else if (mode == 2) { + // not found; move up to the insertion position + while (index < this->n_ && *((*this)[index]) < t) { + index++; + } + } + } + return result; +} + +template inline uint32_t SYSpSortedVector::index(const T* t) const { + assert(isSorted()); + uint32_t index; + if (bsearch(*t, index, 1)) { + return index; + } else { + return SYS_NPOS; + } +} + +template inline bool SYSpSortedVector::contains(const T* t) const { + assert(isSorted()); + uint32_t index; + return bsearch(*t, index, 0); +} + +template inline T* SYSpSortedVector::find(const T* t) const { + assert(isSorted()); + uint32_t index; + if (bsearch(*t, index, 1)) { + return (*this)[index]; + } else { + return 0; + } +} + +template inline void SYSpSortedVector::insert(T* t) { + assert(isSorted()); + uint32_t index; + bsearch(*t, index, 2); + this->insertAt(index, t); + assert(isSorted()); +} + +template inline T* SYSpSortedVector::remove(const T* t) { + assert(isSorted()); + uint32_t index; + if (bsearch(*t, index, 1)) { + T* result = (*this)[index]; + this->removeAt(index); + assert(isSorted()); + return result; + } else { + return 0; + } +} + +template inline void SYSpSortedVector::clearAndDestroy() { + uint32_t i = 0; + for (i = 0; i < this->n_; ++i) { + delete (*this)[i]; + } + this->clear(); +} + +template inline bool SYSpSortedVector::operator ==(const SYSpSortedVector& right) const { + if (this->length() != right.length()) { + return false; + } + for (uint32_t i = 0; i < this->length(); ++i) { + if (!(*(*this)[i] == *right[i])) { + return false; + } + } + return true; +} + +#ifndef NDEBUG +template inline bool SYSpSortedVector::isSorted() const { + if (this->n_ < 2) { + return true; + } + for (uint32_t index = 0; index < this->n_ - 1; ++index) { + if (!(*((*this)[index]) < *((*this)[index + 1])) && !(*((*this)[index]) == *((*this)[index + 1]))) { + return false; + } + } + return true; +} +#endif + +// +// SYSlarray: single linked list of arrays. +// +template > class SYSlarray: private SYSslist, public A { +private: + + uint32_t length_; + +public: + + SYSlarray() : length_(0) { + } + + ~SYSlarray(); + + const T& operator[](const uint32_t index) const; + + T& operator[](const uint32_t index); + + void append(const T& t); + + T removeLast(); + + void clear(); + + uint32_t length() const; +}; + +template inline const T& SYSlarray::operator[](const uint32_t index) const { + assert(index < length_); + SYSslistIterator iterator(*const_cast*>(static_cast*>(this))); + const uint32_t pos = index / G; + uint32_t i = 0; + while (i++ <= pos && ++iterator) { + } + T* a = iterator.key(); + return a[index % G]; +} + +template inline T& SYSlarray::operator[](const uint32_t index) { + assert(index < length_); + SYSslistIterator iterator(*this); + const uint32_t pos = index / G; + uint32_t i = 0; + while (i++ <= pos && ++iterator) { + } + T* a = iterator.key(); + return a[index % G]; +} + +template inline void SYSlarray::append(const T& t) { + T* a; + if (length_ % G == 0) { + a = this->build(G); + SYSslist::append(a); + } else { + a = SYSslist::last(); + } + a[length_++ % G] = t; +} + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +// TODO: solve this warning +// If T is of type pointer, then T result is an unintialized pointer. +// The code does not garantee that result will be assigned later a value. +// In this case, the method can return an unintialized pointer. +// When called from engine/vec.h:1003 for example, this could lead to a crash. +#endif + +template inline T SYSlarray::removeLast() { + assert(length_ > 0); + T result = (*this)[length_ - 1]; + --length_; + if (length_ % G == 0) { + T* a = SYSslist::removeAt(length_ / G); + this->destroy(a); + } + return result; +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +template inline void SYSlarray::clear() { + SYSslistIterator iterator(*this); + while (++iterator) { + this->destroy(iterator.key()); + } + SYSslist::clear(); + length_ = 0; +} + +template inline SYSlarray::~SYSlarray() { + clear(); +} + +template inline uint32_t SYSlarray::length() const { + return length_; +} + +// +// SYSlvector: vector made of multiple small blocks to limit memory usage and fragmentation. +// Allows concurrent reads and appends. +// +template > class SYSlvector: public A { +private: + + SYSlarray array_; + uint32_t length_; + +protected: + + void resize(const uint32_t length); + +public: + + static const int BLOCK_SIZE = 2048; + + SYSlvector(); + + ~SYSlvector(); + + SYSlvector(const SYSlvector& right); + + SYSlvector& operator =(const SYSlvector& right); + + void clear(); + + uint32_t length() const; + + void append(const T& t); + + const T& operator[](const uint32_t index) const; + + const T& first() const; + + const T& last() const; + + T& operator[](const uint32_t index); + + void shrink(const uint32_t length); + + int64_t getSize() const; +}; + +template inline SYSlvector::SYSlvector() : + length_(0) { +} + +template inline void SYSlvector::clear() { + for (uint32_t i = 0; i < array_.length(); ++i) { + this->destroy(array_[i]); + } + array_.clear(); + this->resetCount(); + length_ = 0; +} + +template inline SYSlvector::~SYSlvector() { + clear(); +} + +template inline uint32_t SYSlvector::length() const { + return length_; +} + +template inline void SYSlvector::resize(const uint32_t length) { + const uint32_t oldN = array_.length(); + const uint32_t newN = length == 0 ? 0 : (length + BLOCK_SIZE - 1) / BLOCK_SIZE; + if (oldN < newN) { + for (uint32_t i = oldN; i < newN; ++i) { + array_.append(this->build(BLOCK_SIZE)); + } + } else if (oldN > newN) { + for (uint32_t i = newN; i < oldN; ++i) { + this->destroy(array_.removeLast()); + } + if (newN == 0) { + array_.clear(); + this->resetCount(); + } + } + length_ = length; +} + +template inline void SYSlvector::shrink(const uint32_t length) { + if (length < this->length()) { + resize(length); + } +} + +template inline void SYSlvector::append(const T& t) { + const uint32_t n = length_ / BLOCK_SIZE; + if (n == array_.length()) { + array_.append(this->build(BLOCK_SIZE)); + } + array_[n][length_++ % BLOCK_SIZE] = t; +} + +template inline const T& SYSlvector::operator[](const uint32_t index) const { + assert(index < length_); + const T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline const T& SYSlvector::first() const { + return (*this)[0]; +} + +template inline const T& SYSlvector::last() const { + return (*this)[length_ - 1]; +} + +template inline T& SYSlvector::operator[](const uint32_t index) { + assert(index < length_); + T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline SYSlvector& SYSlvector::operator =(const SYSlvector& right) { + if (this != &right) { + clear(); + for (uint32_t i = 0; i < right.length_; ++i) { + append(right[i]); + } + } + return *this; +} + +template inline SYSlvector::SYSlvector(const SYSlvector& right) : + length_(0) { + *this = right; +} + +template inline int64_t SYSlvector::getSize() const { + return length() * sizeof(A); +} + +// +// SYSbitVector: bit vector. +// Allows concurrent reads and appends. +// +template class SYSbitVector: private SYSlvector { + +private: + + uint32_t size_; + +protected: + + void resize(const uint32_t n); + +public: + + SYSbitVector(); + SYSbitVector(const SYSbitVector& right); + ~SYSbitVector(); + + // accessors + bool isEmpty() const; + uint32_t length() const; + bool operator [](const uint32_t index) const; + int64_t getSize() const; + bool areAll(bool value) const; + + // operations + void clearBit(const uint32_t offset); + void setBit(const uint32_t offset); + void clear(); + void shrink(const uint32_t length); + + // operators + SYSbitVector& operator =(const SYSbitVector& right); +}; + + +template inline bool SYSbitVector::areAll(bool value) const { + if (isEmpty()) return true; + const uint n = size_/64; + for (uint i=0; i::operator[](i); + if (*p != (value ? ULLONG_MAX : 0)) + return false; + } + const uint r = size_%64; + if (r != 0) { + const uint64_t* p = &SYSlvector::operator[](n); + for (uint i=0; i>i) & 1ULL) != value) + return false; + } + } + return true; +} +template inline bool SYSbitVector::isEmpty() const { + return size_ == 0; +} + +template inline uint32_t SYSbitVector::length() const { + return size_; +} + +template inline void SYSbitVector::clearBit(const uint32_t offset) { + if (offset < length()) { + uint64_t* p = &SYSlvector::operator[](offset >> 6); + *p &= ~(1ULL << (offset & 63ULL)); + } +} + +template inline void SYSbitVector::resize(const uint32_t n) { + uint32_t oldN = SYSlvector::length(); + SYSlvector::resize((n + 63) / 64); + const uint32_t newN = SYSlvector::length(); + + // Reset added words. + while (oldN < newN) { + SYSlvector::operator[](oldN++) = 0; + } + + // Reset added bits. + uint32_t nbits = 63 - (size_ % 64); + while (size_ < n && nbits > 0) { + clearBit(size_++); + nbits--; + } + size_ = n; +} + +template inline void SYSbitVector::setBit(const uint32_t offset) { + if (offset >= length()) { + resize(offset + 1); + } + uint64_t* p = &SYSlvector::operator[](offset >> 6); + *p |= (1ULL << (offset & 63ULL)); +} + +template inline SYSbitVector::SYSbitVector() : size_(0) { +} + +template inline void SYSbitVector::clear() { + SYSlvector::clear(); + size_ = 0; +} + +template inline SYSbitVector& SYSbitVector::operator =(const SYSbitVector& right) { + if (this != &right) { + SYSlvector::operator =(right); + size_ = right.size_; + } + return *this; +} + +template inline SYSbitVector::SYSbitVector(const SYSbitVector& right) { + *this = right; +} + +template inline bool SYSbitVector::operator[](const uint32_t index) const { + assert(index < length()); + return SYSlvector::operator[](index / 64) & (1ULL << (index % 64)); +} + +template inline SYSbitVector::~SYSbitVector() { + clear(); +} + +template inline void SYSbitVector::shrink(const uint32_t length) { + if (length < this->length()) { + resize(length); + } +} + +template inline int64_t SYSbitVector::getSize() const { + return SYSlvector::getSize(); +} + +// +// SYSxvector: vector made of multiple small blocks to limit memory usage and fragmentation. +// Allows concurrent reads, but without appends. +// +template > class SYSxvector: public A { +private: + + SYSpVector array_; + uint32_t length_; + +protected: + + void resize(const uint32_t length); + +public: + + static const uint32_t BLOCK_SIZE = 2048; + + SYSxvector(); + + ~SYSxvector(); + + SYSxvector(const SYSxvector& right); + + SYSxvector(const uint32_t length); + + SYSxvector& operator =(const SYSxvector& right); + + void clear(); + + uint32_t length() const; + + void append(const T& t); + + const T& operator[](const uint32_t index) const; + + const T& first() const; + + const T& last() const; + + uint32_t index(const T& t) const; + + bool contains(const T& t) const; + + T& operator[](const uint32_t index); + + void shrink(const uint32_t length); + + int64_t getSize() const; +}; + +template inline SYSxvector::SYSxvector() : + length_(0) { +} + +template inline void SYSxvector::clear() { + for (uint32_t i = 0; i < array_.entries(); ++i) { + this->destroy(array_[i]); + } + array_.clear(); + this->resetCount(); + length_ = 0; +} + +template inline SYSxvector::~SYSxvector() { + clear(); +} + +template inline uint32_t SYSxvector::length() const { + return length_; +} + +template inline void SYSxvector::resize(const uint32_t length) { + const uint32_t oldN = array_.length(); + const uint32_t newN = length == 0 ? 0 : (length + BLOCK_SIZE - 1) / BLOCK_SIZE; + if (oldN < newN) { + for (uint32_t i = oldN; i < newN; ++i) { + array_.append(this->build(BLOCK_SIZE)); + } + } else if (oldN > newN) { + for (uint32_t i = newN; i < oldN; ++i) { + this->destroy(array_.last()); + array_.removeLast(); + } + if (newN == 0) { + array_.clear(); + this->resetCount(); + } + } + length_ = length; +} + +template inline SYSxvector::SYSxvector(const uint32_t length) : length_(0) { + resize(length); +} + +template inline void SYSxvector::shrink(const uint32_t length) { + if (length < this->length()) { + resize(length); + } +} + +template inline void SYSxvector::append(const T& t) { + const uint32_t n = length_ / BLOCK_SIZE; + if (n == array_.length()) { + array_.append(this->build(BLOCK_SIZE)); + } + array_[n][length_++ % BLOCK_SIZE] = t; +} + +template inline const T& SYSxvector::operator[](const uint32_t index) const { + assert(index < length_); + const T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline const T& SYSxvector::first() const { + return (*this)[0]; +} + +template inline const T& SYSxvector::last() const { + return (*this)[length_ - 1]; +} + +template inline bool SYSxvector::contains(const T& t) const { + return index(t) != SYS_NPOS; +} + +template inline uint32_t SYSxvector::index(const T& t) const { + uint32_t k = 0, j = 0; + uint32_t remaining = length_; + while (k < length_) { + const T* v = array_[j++]; + uint32_t max_i = (remaining > BLOCK_SIZE ? BLOCK_SIZE : remaining); + for (uint i=0; i inline T& SYSxvector::operator[](const uint32_t index) { + assert(index < length_); + T* v = array_[index / BLOCK_SIZE]; + return v[index % BLOCK_SIZE]; +} + +template inline SYSxvector& SYSxvector::operator =(const SYSxvector& right) { + if (this != &right) { + clear(); + for (uint32_t i = 0; i < right.length_; ++i) { + append(right[i]); + } + } + return *this; +} + +template inline SYSxvector::SYSxvector(const SYSxvector& right) : length_(0) { + *this = right; +} + +template inline int64_t SYSxvector::getSize() const { + return length() * sizeof(A); +} + +} + +#endif /* #ifndef _engine_vec_h_ */ diff --git a/storage/sparrow/functions/functions.cc b/storage/sparrow/functions/functions.cc new file mode 100644 index 000000000000..d1b4ebe4cc51 --- /dev/null +++ b/storage/sparrow/functions/functions.cc @@ -0,0 +1,560 @@ +/* + InfoVista functions. +*/ + +#include "functions.h" +#include "ipaddress.h" +#include "../engine/misc.h" +#include "../engine/internalapi.h" + +#include "../engine/log.h" +#include "sql/sql_class.h" +#include "sql/current_thd.h" +#include "sql/tztime.h" + +using namespace IvFunctions; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function IPTOSTR +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_iptostr::resolve_type(THD *thd) { + if (Item_str_func::resolve_type(thd)) return true; + set_data_type_string(ulonglong(39)); // IPv6 max textual length. + set_nullable(true); // Can be NULL, e.g. in case of badly formed input string + return false; +} + +String* Item_func_iptostr::val_str(String* str) { + assert(fixed == 1); + Item& arg = *args[0]; + String* s = arg.val_str(str); + if (arg.is_null() || s->charset() != &my_charset_bin || (s->length() != 4 && s->length() != 16)) { + null_value = 1; + return 0; + } + IpAddress address(reinterpret_cast(s->ptr()), s->length()); + tmp_value.alloc(max_length); + tmp_value.length(address.print(const_cast(tmp_value.ptr()))); + null_value = 0; + + return &tmp_value; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function STRTOIP +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_strtoip::resolve_type(THD *thd) { + if (Item_str_func::resolve_type(thd)) return true; + set_data_type_string(ulonglong(16)); + set_nullable(true); // Can be NULL, e.g. in case of badly formed input string + return false; +} + +String* Item_func_strtoip::val_str(String* str) { + assert(fixed == 1); + Item& arg = *args[0]; + if (arg.is_null()) { + null_value = 1; + return 0; + } + String* s = arg.val_str(str); + tmp_value.alloc(max_length); + IpAddress address(reinterpret_cast(tmp_value.ptr()), max_length); + if (!address.parse(s->ptr(), s->length())) { + null_value = 1; + return 0; + } + tmp_value.length(address.isV4() ? 4 : 16); + null_value = 0; + return &tmp_value; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function MASKIP +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_maskip::resolve_type(THD *thd) { + if (Item_str_func::resolve_type(thd)) return true; + set_data_type_string(ulonglong(16)); + set_nullable(true); // Can be NULL, e.g. in case of badly formed input string + return false; +} + +String* Item_func_maskip::val_str(String* str) { + assert(fixed == 1); + Item& arg0 = *args[0]; + String* s0 = arg0.val_str(str); + if (arg0.is_null() || s0->charset() != &my_charset_bin || (s0->length() != 4 && s0->length() != 16)) { + null_value = 1; + return 0; + } + tmp_value.alloc(max_length); + tmp_value.set(*s0, 0, s0->length()); + Item& arg1 = *args[1]; + String* s1 = arg1.val_str(str); + if (arg1.is_null() || s1->charset() != &my_charset_bin || (s1->length() != 4 && s1->length() != 16)) { + null_value = 1; + return 0; + } + IpAddress address(reinterpret_cast(tmp_value.ptr()), tmp_value.length()); + if (!address.applyMask(IpAddress(reinterpret_cast(s1->ptr()), s1->length()))) { + null_value = 1; + return 0; + } + tmp_value.length(address.isV4() ? 4 : 16); + null_value = 0; + return &tmp_value; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function GETIPMASK +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_getipmask::resolve_type(THD *thd) { + if (Item_str_func::resolve_type(thd)) return true; + set_data_type_string(ulonglong(16)); + set_nullable(true); // Can be NULL, e.g. in case of badly formed input string + return false; +} + +String* Item_func_getipmask::val_str(String* str) { + Item& arg = *args[0]; + if (arg.is_null()) { + null_value = 1; + return 0; + } + longlong bits = arg.val_int(); + if (bits < 0 || bits > 128) { + null_value = 1; + return 0; + } + const uint32_t length = bits <= 32 ? 4 : 16; + if (str->alloc(length)) { + null_value = 1; + return 0; + } + str->length(length); + IpAddress address(reinterpret_cast(str->ptr()), length); + address.makeMask(static_cast(bits)); + null_value = 0; + return str; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function ISIPV4 +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Replaced with MySQL built-in IP_ISV4() + +/*bool Item_func_isipv4::val_bool() { + Item& arg = *args[0]; + if (arg.is_null()) { + null_value = 1; + return 0; + } + String tmp; + String* s = arg.val_str(&tmp); + if (s == 0 || (s->length() != 4 && s->length() != 16) || s->charset() != &my_charset_bin) { + null_value = 1; + return 0; + } + IpAddress address(reinterpret_cast(s->ptr()), s->length()); + null_value = 0; + return address.isV4(); +}*/ + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function ISIPPRIVATE +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_isipprivate::resolve_type(THD *thd) { + max_length = 1; + set_nullable(true); // Can be NULL, e.g. in case of badly formed input string + return Item_int_func::resolve_type(thd); +} + +bool Item_func_isipprivate::val_bool() { + Item& arg = *args[0]; + if (arg.is_null()) { + null_value = 1; + return 0; + } + String tmp; + String* s = arg.val_str(&tmp); + if (s == 0 || (s->length() != 4 && s->length() != 16) || s->charset() != &my_charset_bin) { + null_value = 1; + return 0; + } + IpAddress address(reinterpret_cast(s->ptr()), s->length()); + null_value = 0; + return address.isPrivate(); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Functions TADJUST and TADJUSTW +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_tadjust::resolve_type(THD *thd) { + if (Item_int_func::resolve_type(thd)) return true; + set_nullable(true); + max_length = MAX_BIGINT_WIDTH + 1; + return false; +} + +// Gets the number of periods. +int Item_func_tadjust::getN() { + if (nArg_ == -1) { + return 1; + } else { + Item& arg = *args[nArg_]; + if (arg.is_null()) { + return -1; + } else { + return static_cast(arg.val_int()); + } + } +} + +// Gets the first day of the week (0 is Monday ... 6 is Sunday). +int Item_func_tadjust::getFdow() { + if (fdowArg_ == -1) { + // Use MySQL setting: it is Sunday or Monday. + // See http://dev.mysql.com/doc/refman/5.1/en/date-and-time-functions.html#function_week. + return ((current_thd->variables.default_week_format % 2) == 0) ? 6 : 0; + } else { + Item& arg = *args[fdowArg_]; + if (arg.is_null()) { + return -1; + } else { + return static_cast(arg.val_int()); + } + } +} + +longlong Item_func_tadjust::getSeconds(MYSQL_TIME* t) { + assert(fixed == 1); + if (get_arg0_date(t, 0)) { + null_value = args[0]->null_value; + return 0; + } + if (args[0]->type() == FIELD_ITEM) { + Field* field = static_cast(args[0])->field; + if (field->type() == MYSQL_TYPE_TIMESTAMP) { + my_timeval tm; + int warning = 0; + if ( !static_cast(field)->get_timestamp(&tm, &warning) ) { + return tm.m_tv_sec; + } + } + } + bool dummy; + return static_cast(current_thd->time_zone()->TIME_to_gmt_sec(t, &dummy)); +} + +longlong Item_func_tadjust::val_int() { + assert(fixed == 1); + const int period = getN(); + const int fdow = getFdow(); + if (period <= 0 || fdow < 0) { + null_value = 1; + return 0; + } + MYSQL_TIME t; + longlong seconds = getSeconds(&t); + if (seconds == 0) { + null_value = 1; + return 0; + } + + // Do not handle microseconds. + t.second_part = 0; + + // Adjust the timestamp to the given period. + unsigned int p = static_cast(period); + unsigned int delta = 0; + switch (intervalType_) { + case INTERVAL_YEAR: + t.year -= (t.year % p); + t.month = 1; + t.day = 1; + t.hour = 0; + t.minute = 0; + t.second = 0; + break; + case INTERVAL_QUARTER: + p *= 3; + [[fallthrough]]; + case INTERVAL_MONTH: + if (p > 12 || (12 % p) != 0) { + null_value = 1; + return 0; + } + t.month -= ((t.month - 1) % p); + t.day = 1; + t.hour = 0; + t.minute = 0; + t.second = 0; + break; + case INTERVAL_WEEK: { + t.hour = 0; + t.minute = 0; + t.second = 0; + long nday = calc_daynr(t.year, t.month, t.day); + int weekDay = (nday - 2 - fdow) % 7; + if (weekDay != 0) { + get_date_from_daynr(nday - weekDay, &t.year, &t.month, &t.day); + } + break; + } + case INTERVAL_DAY: { + t.hour = 0; + t.minute = 0; + t.second = 0; + long nday = calc_daynr(t.year, t.month, t.day); + nday = nday - nday%p; + get_date_from_daynr(nday, &t.year, &t.month, &t.day); + break; + } + case INTERVAL_HOUR: + if (p >= 24 || (24 % p) != 0) { + null_value = 1; + return 0; + } + t.hour -= (t.hour % p); + t.minute = 0; + t.second = 0; + delta = p * 3600; + break; + case INTERVAL_MINUTE: + if (p >= 60 || (60 % p) != 0) { + null_value = 1; + return 0; + } + t.minute -= (t.minute % p); + t.second = 0; + delta = p * 60; + break; + case INTERVAL_SECOND: + if (p >= 60 || (60 % p) != 0) { + null_value = 1; + return 0; + } + t.second -= (t.second % p); + delta = p; + break; + default: + null_value = 1; + return 0; + } + bool dummy; + longlong adjusted = static_cast(current_thd->time_zone()->TIME_to_gmt_sec(&t, &dummy)); + if (delta > 0) { + longlong gap = seconds - adjusted; + if (gap > 0 && gap > delta) { + seconds = adjusted + delta; + } else if (gap < 0 && -gap > delta) { + seconds = adjusted - delta; + } else { + seconds = adjusted; + } + } else { + seconds = adjusted; + } + if (seconds == 0) { + null_value = 1; + return 0; + } else { + null_value = 0; + return seconds; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function TNEXT +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_tnext::resolve_type(THD *thd) { + if (Item_int_func::resolve_type(thd)) return true; + set_nullable(true); + max_length = MAX_BIGINT_WIDTH + 1; + return false; +} + +longlong Item_func_tnext::val_int() { + assert(fixed == 1); + if (args[0]->is_null() || args[1]->is_null()) { + null_value = 1; + return 0; + } + longlong seconds = args[0]->val_int(); + if (seconds == 0) { + null_value = 1; + return 0; + } + longlong n = args[1]->val_int(); + if (n == 0) { + null_value = 0; + return seconds; + } + + // Compute the added interval. + Interval interval; + memset(&interval, 0, sizeof(interval)); + if (n < 0) { + interval.neg = true; + n = -n; + } + switch (intervalType_) { + case INTERVAL_YEAR: + interval.year = static_cast(n); + break; + case INTERVAL_QUARTER: + n *= 3; + [[fallthrough]]; + case INTERVAL_MONTH: + interval.month = static_cast(n); + break; + case INTERVAL_WEEK: + n *= 7; + [[fallthrough]]; + case INTERVAL_DAY: + interval.day = static_cast(n); + break; + case INTERVAL_HOUR: + interval.hour = static_cast(n); + break; + case INTERVAL_MINUTE: + interval.minute = static_cast(n); + break; + case INTERVAL_SECOND: + interval.second = static_cast(n); + break; + default: + null_value = 1; + return 0; + } + MYSQL_TIME t; + current_thd->time_zone()->gmt_sec_to_TIME(&t, static_cast(seconds)); + if (date_add_interval_with_warn(current_thd, &t, intervalType_, interval)) { + null_value = 1; + return 0; + } + bool dummy; + seconds = static_cast(current_thd->time_zone()->TIME_to_gmt_sec(&t, &dummy)); + null_value = 0; + return seconds; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function GETNEWEST +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_getnewest::resolve_type(THD *thd) { + if (Item_int_func::resolve_type(thd)) return true; + set_nullable(true); + max_length = MAX_BIGINT_WIDTH + 1; + return false; +} + +longlong Item_func_getnewest::val_int() { + Item& arg0 = *args[0]; + if (arg0.is_null()) { + null_value = 1; + return 0; + } + String tmp0; + String* s0 = arg0.val_str(&tmp0); + if (s0 == 0 ) { + null_value = 1; + return 0; + } + + Item& arg1 = *args[1]; + if (arg1.is_null()) { + null_value = 1; + return 0; + } + String tmp1; + String* s1 = arg1.val_str(&tmp1); + if (s1 == 0 ) { + null_value = 1; + return 0; + } + + using namespace Sparrow; + + Str databaseName( s0->c_ptr() ); + Str tableName( s1->c_ptr() ); + + uint64_t newest = 0; + try { + MasterGuard master = InternalApi::get(databaseName.c_str(), tableName.c_str(), false, false, 0); + ReadGuard masterGuard(master->getLock()); + newest = master->getNewest() / 1000; + + } catch(const SparrowException& e) { + null_value = 1; + spw_print_error("Sparrow: Cannot get timestamp of newest data for %s.%s: %s", databaseName.c_str(), tableName.c_str(), e.getText()); + return HA_ERR_INTERNAL_ERROR; + } + + return newest; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Function GETOLDEST +////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool Item_func_getoldest::resolve_type(THD *thd) { + if (Item_int_func::resolve_type(thd)) return true; + set_nullable(true); + max_length = MAX_BIGINT_WIDTH + 1; + return false; +} + + +longlong Item_func_getoldest::val_int() { + Item& arg0 = *args[0]; + if (arg0.is_null()) { + null_value = 1; + return 0; + } + String tmp0; + String* s0 = arg0.val_str(&tmp0); + if (s0 == 0 ) { + null_value = 1; + return 0; + } + + Item& arg1 = *args[1]; + if (arg1.is_null()) { + null_value = 1; + return 0; + } + String tmp1; + String* s1 = arg1.val_str(&tmp1); + if (s1 == 0 ) { + null_value = 1; + return 0; + } + + using namespace Sparrow; + + Str databaseName( s0->c_ptr() ); + Str tableName( s1->c_ptr() ); + + uint64_t oldest = 0; + try { + MasterGuard master = InternalApi::get(databaseName.c_str(), tableName.c_str(), false, false, 0); + ReadGuard masterGuard(master->getLock()); + oldest = master->getOldest() / 1000; + + } catch(const SparrowException& e) { + null_value = 1; + spw_print_error("Sparrow: Cannot get timestamp of oldest data for %s.%s: %s", databaseName.c_str(), tableName.c_str(), e.getText()); + return HA_ERR_INTERNAL_ERROR; + } + + return oldest; +} diff --git a/storage/sparrow/functions/functions.h b/storage/sparrow/functions/functions.h new file mode 100644 index 000000000000..a3c42e0046ac --- /dev/null +++ b/storage/sparrow/functions/functions.h @@ -0,0 +1,14 @@ +/* + InfoVista functions. +*/ + +#ifndef _functions_functions_h_ +#define _functions_functions_h_ + + +#include "sql/item.h" +#include "sql/sql_time.h" +#include "sql/item_strfunc.h" +#include "sql/item_timefunc.h" + +#endif /* #ifndef _functions_functions_h_ */ diff --git a/storage/sparrow/functions/ipaddress.cc b/storage/sparrow/functions/ipaddress.cc new file mode 100644 index 000000000000..d4eabeb5ae1e --- /dev/null +++ b/storage/sparrow/functions/ipaddress.cc @@ -0,0 +1,372 @@ +/* + IP address. +*/ + +#include "ipaddress.h" + +#include +#include +#include +#include + +namespace IvFunctions { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IpAddress +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// Prints an IP address. Returns the length of the output string, or 0 in case of error. +uint32_t IpAddress::print(char* buffer) const { + if (!isValid()) { + return 0; + } + if (isV4()) { + const uint8_t* bytes = (length_ == 4 ? bytes_ : bytes_ + 12); + return sprintf(buffer, "%u.%u.%u.%u", bytes[0], bytes[1], bytes[2], bytes[3]); + } else { + bool ok = false; + + // Check for IPv6-compatible, IPv4-mapped, and IPv4-translated addresses. + if (bytes_[0] == 0 && bytes_[1] == 0 && bytes_[2] == 0 && bytes_[3] == 0 + && bytes_[4] == 0 && bytes_[5] == 0 && bytes_[6] == 0 && bytes_[7] == 0 + && (bytes_[12] != 0 || bytes_[13] != 0)) { + if (bytes_[8] == 0 && bytes_[9] == 0 + && ((bytes_[10] == 0 && bytes_[11] == 0) || (bytes_[10] == 0xff && bytes_[11] == 0xff))) { + // Compatible or mapped. + sprintf(buffer, "::%s%u.%u.%u.%u", bytes_[10] == 0 ? "" : "ffff:", + bytes_[12], bytes_[13], bytes_[14], bytes_[15]); + ok = true; + } + else if (bytes_[8] == 0xff && bytes_[9] == 0xff && bytes_[10] == 0 && bytes_[11] == 0) { + // Compatible or mapped. + sprintf(buffer, "::ffff:0:%u.%u.%u.%u", bytes_[12], bytes_[13], bytes_[14], bytes_[15]); + ok = true; + } + } + if (!ok) { + int maxFirst = 0; + int maxLast = 0; + int curFirst = 0; + int curLast = 0; + for (int i = 0; i < 8; ++i) { + if (bytes_[i * 2] == 0 && bytes_[i * 2 + 1] == 0) { + // Extend current substring. + curLast = i + 1; + + // Check if current is now largest. + if (curLast - curFirst > maxLast - maxFirst) { + maxFirst = curFirst; + maxLast = curLast; + } + } else { + // Start a new substring. + curFirst = i + 1; + curLast = i + 1; + } + } + + // Ignore a substring of length 1. + if (maxLast - maxFirst <= 1) { + maxFirst = maxLast = 0; + } + + // Write colon-separated words. + // A double-colon takes the place of the longest string of zeroes. + // All zeroes is just "::". + char* tmpBuffer = buffer; + for (int i = 0; i < 8; ++i) { + // Skip over string of zeroes. + if (maxFirst <= i && i < maxLast) { + tmpBuffer += sprintf(tmpBuffer, "::"); + i = maxLast - 1; + continue; + } + + // Need colon separator if not at beginning. + if (i != 0 && i != maxLast) { + *tmpBuffer++ = ':'; + } + tmpBuffer += sprintf(tmpBuffer, "%x", (bytes_[i * 2] << 8) | bytes_[i * 2 + 1]); + } + } + return static_cast(strlen(buffer)); + } +} + +// Parses an IP address. Returns true if OK. +bool IpAddress::parse(const char* buffer, uint32_t length) { + assert(length_ == 16); + + // Consider the input string is not null-terminated. + // Try IPv4 first. + if (length >= 7 && length <= 15) { + int n = 0; + int byte = 0; + bool ok = true; + for (uint32_t i = 0; i < length; ++i) { + char c = buffer[i]; + if (isdigit(c)) { + n = n * 10 + static_cast(c - '0'); + if (n > 255) { + ok = false; + break; + } + } + if (c == '.' || i + 1 == length) { + if (byte == 4) { + ok = false; + break; + } + bytes_[byte++] = n; + n = 0; + } else if (!isdigit(c)) { + ok = false; + break; + } + } + if (ok && byte == 4) { + length_ = 4; + return true; + } + } + + // Not IPv4: try IPv6. + enum + { + start, + inNumber, + afterDoubleColon + } state = start; + + bool result = true; + int number = -1; + bool sawHex = false; + int numColons = 0, numDots = 0; + int sawDoubleColon = 0; + int i = 0; + uint32_t l = 0; + while (l < length) { + char c = buffer[l]; + switch (state) { + case start: + if (c == ':') { + // This case only handles double-colon at the beginning. + if (numDots > 0 || numColons > 0 || buffer[1] != ':') { + goto finish; + } + sawDoubleColon = 1; + numColons = 2; + bytes_[i * 2] = 0; // Pretend it was 0:: + bytes_[i * 2 + 1] = 0; + i++; + l++; + state = afterDoubleColon; + break; + } + [[fallthrough]]; + case afterDoubleColon: + if (isdigit(c)) { + sawHex = false; + number = l; + state = inNumber; + } else if (isxdigit(c)) { + if (numDots > 0) { + goto finish; + } + sawHex = true; + number = l; + state = inNumber; + } else { + goto finish; + } + break; + case inNumber: + if (isdigit(c)) { + // Remain in InNumber state. + } else if (isxdigit(c)) { + if (numDots > 0) { + goto finish; + } + sawHex = true; + // Remain in InNumber state. + } + else if (c == ':') { + if (numDots > 0) { + goto finish; + } + if (numColons > 6) { + goto finish; + } + if (buffer[l + 1] == ':') { + if (sawDoubleColon || numColons > 5) { + goto finish; + } + sawDoubleColon = numColons + 1; + numColons += 2; + l++; + state = afterDoubleColon; + } else { + numColons++; + state = start; + } + } + else if (c == '.') { + if (sawHex || numDots > 2 || numColons > 6) { + goto finish; + } + numDots++; + state = start; + } else { + goto finish; + } + break; + } + // If we finished a number, parse it. + if (state != inNumber && number != -1) { + // Note either numDots > 0 or numColons > 0, + // because something terminated the number. + if (numDots == 0) { + int n = parseHex(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[i * 2] = (n >> 8) & 0xff; + bytes_[i * 2 + 1] = n & 0xff; + i++; + } else { + int n = parseInt(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[2 * i + numDots-1] = static_cast(n); + } + } + l++; + } + +finish: + + // Check that we have a complete address. + if (numDots == 0) { + } else if (numDots == 3) { + numColons++; + } else { + result = false; + } + if (result) { + if (sawDoubleColon) { + } else if (numColons == 7) { + } else { + result = false; + } + if (result) { + // Parse the last number, if necessary. + if (state == inNumber) { + if (numDots == 0) { + int n = parseHex(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[i * 2] = (n >> 8) & 0xff; + bytes_[i * 2 + 1] = n & 0xff; + } else { + int n = parseInt(buffer + number, length - number); + if (n == -1) { + return false; + } + bytes_[2 * i + numDots] = static_cast(n); + } + } else if (state == afterDoubleColon) { + bytes_[i * 2] = 0; // pretend it was ::0 + bytes_[i * 2 + 1] = 0; + } else { + result = false; + } + + // Insert zeroes for the double-colon, if necessary. + if (result && sawDoubleColon) { + memmove(&bytes_[(sawDoubleColon + 8 - numColons) * 2], + &bytes_[sawDoubleColon * 2], (numColons - sawDoubleColon) * 2); + memset(&bytes_[sawDoubleColon * 2], 0, (8 - numColons) * 2); + } + } + } + return result; +} + +// Helper methods to parse decimal (0..255) and hex numbers (0..ffff). +// They return -1 in case of error. +// STATIC +int IpAddress::parseInt(const char* buffer, uint32_t length) { + int n = 0; + bool hasDigit = false; + for (uint32_t i = 0; i < length; ++i) { + char c = buffer[i]; + if (isdigit(c)) { + hasDigit = true; + n = n * 10 + static_cast(c - '0'); + } else { + if (isspace(c)) { + if (hasDigit) { + break; + } + } else { + break; + } + } + } + return (hasDigit && n <= 255) ? n : -1; +} + +// STATIC +int IpAddress::parseHex(const char* buffer, uint32_t length) { + int n = 0; + bool hasDigit = false; + for (uint32_t i = 0; i < length; ++i) { + char c = buffer[i]; + if (isxdigit(c)) { + hasDigit = true; + int hex = c >= 'a' && c <= 'f' ? c - 'a' + 10 : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - '0'; + n = n * 16 + hex; + } else { + if (isspace(c)) { + if (hasDigit) { + break; + } + } else { + break; + } + } + } + return (hasDigit && n <= 65535) ? n : -1; +} + +bool IpAddress::applyMask(const IpAddress& mask) { + if (isV4()) { + if (!mask.isV4()) { + return false; + } + uint8_t* bytes = (length_ == 4 ? bytes_ : bytes_ + 12); + const uint8_t* mbytes = (mask.length_ == 4 ? mask.bytes_ : mask.bytes_ + 12); + for (uint32_t i = 0; i < 4; ++i) { + bytes[i] &= mbytes[i]; + } + return true; + } else { + for (uint32_t i = 0; i < mask.length_; ++i) { + bytes_[i] &= mask.bytes_[i]; + } + return true; + } +} + +void IpAddress::makeMask(int bits) { + memset(bytes_, 0, length_); + int i = 0; + while (bits > 0) { + bytes_[i++] = bits >= 8 ? 0xff : (((1 << bits) - 1) << (8 - bits)); + bits -= 8; + } +} + +} diff --git a/storage/sparrow/functions/ipaddress.h b/storage/sparrow/functions/ipaddress.h new file mode 100644 index 000000000000..4bb92ea4eb05 --- /dev/null +++ b/storage/sparrow/functions/ipaddress.h @@ -0,0 +1,74 @@ +/* + IP address. +*/ + +#ifndef _functions_ipaddress_h_ +#define _functions_ipaddress_h_ + + +#include +#include + +namespace IvFunctions { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// IpAddress +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class IpAddress { +private: + + uint8_t* bytes_; + uint32_t length_; + +private: + + static int parseInt(const char* buffer, uint32_t length); + static int parseHex(const char* buffer, uint32_t length); + +public: + + IpAddress() : bytes_(0), length_(0) { + } + + IpAddress(const uint8_t* bytes, uint32_t length) : bytes_(const_cast(bytes)), length_(length) { + } + + bool isValid() const { + return bytes_ != 0 && (length_ == 4 || length_ == 16); + } + + bool isV4() const { + if (length_ == 4) { + return true; + } else { + for (int i = 0; i < 12; ++i) { + if (bytes_[i] != 0) { + return false; + } + } + return true; + } + } + + bool isPrivate() const { + if (isV4()) { + const uint32_t check = length_ == 4 ? ((bytes_[0] << 8) | bytes_[1]) : ((bytes_[12] << 8) | bytes_[13]); + return (check & 0xff00) == 0xa00 || check == 0xc0a8 || (check >> 4) == 0xac1; + } else { + return bytes_[0] == static_cast(0xfd); + } + } + + uint32_t print(char* buffer) const; + + bool parse(const char* buffer, uint32_t length); + + bool applyMask(const IpAddress& mask); + + void makeMask(int bits); +}; + +} + +#endif /* #ifndef _functions_ipaddress_h_ */ diff --git a/storage/sparrow/handler/field.cc b/storage/sparrow/handler/field.cc new file mode 100644 index 000000000000..ee72c0b42107 --- /dev/null +++ b/storage/sparrow/handler/field.cc @@ -0,0 +1,749 @@ +/* + Fields mapping MySQL fields. +*/ + +#include "field.h" +#include "../engine/transient.h" +#include "../engine/fileutil.h" +//#include +//#include + +#include "sql/current_thd.h" +#include "sql/sql_class.h" +#include "sql/tztime.h" + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldBase +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// STATIC +void FieldBase::createFields(const uint32_t serial, const bool coalescing, Field** myFields, const Columns& columns, + TableFields& fields, const ColumnIds* skippedColumnIds /* = NULL */) { + fields.clearAndDestroy(); + const uint32_t nbColumns = columns.length(); + fields.resize(nbColumns); + uint32_t j = 0, k = 0; + for (uint32_t i = 0; i < nbColumns; ++i) { + const Column& column = columns[i]; + const ColumnType type = column.getType(); + const bool isUnsigned = column.isFlagSet(COL_UNSIGNED); + FieldType fieldType = FieldBase::getFieldType(serial, coalescing, column); + bool forceNull = false; + if (skippedColumnIds != NULL && (fieldType == FIELD_NORMAL || fieldType == FIELD_SKIP)) { + // If this column was optimized out of this partition because all its values were NULL, + for (uint l=0; llength(); ++l) { + if ((*skippedColumnIds)[l] == k) { + if (fieldType == FIELD_NORMAL) { + // If the column is expected to exist in the partition, switch to FIELD_DEFAULT instead and force the value to NULL. + fieldType = FIELD_DEFAULT; + forceNull = true; + } else if (fieldType == FIELD_SKIP) { + // If the column has been dropped, switch to FIELD_NONE + fieldType = FIELD_NONE; + } + break; + } + } + } + FieldBase* field = 0; + switch (fieldType) { + case FIELD_NONE: + break; + case FIELD_NORMAL: { + Field* myField = myFields == 0 ? 0 : myFields[j++]; + switch (type) { + case COL_BYTE: { + if (isUnsigned) { + field = new FieldSimple(myField, column); + } else { + field = new FieldSimple(myField, column); + } + break; + } + case COL_SHORT: { + if (isUnsigned) { + field = new FieldSimple(myField, column); + } else { + field = new FieldSimple(myField, column); + } + break; + } + case COL_INT: { + if (isUnsigned) { + field = new FieldSimple(myField, column); + } else { + field = new FieldSimple(myField, column); + } + break; + } + case COL_DOUBLE: + field = new FieldSimple(myField, column); + break; + case COL_TIMESTAMP: + field = new FieldTimestamp(myField, column); + break; + case COL_LONG: { + if (isUnsigned) { + field = new FieldSimple(myField, column); + } else { + field = new FieldSimple(myField, column); + } + break; + } + case COL_STRING: + field = new FieldString(myField, column); + break; + case COL_BLOB: + field = new FieldString(myField, column); + break; + default: assert(0); break; + } + break; + } + case FIELD_DEFAULT: { + Field* myField = myFields == 0 ? 0 : myFields[j++]; + switch (type) { + case COL_BYTE: + if (isUnsigned) { + field = new FieldDefault(myField, column, forceNull); + } else { + field = new FieldDefault(myField, column, forceNull); + } + break; + case COL_SHORT: + if (isUnsigned) { + field = new FieldDefault(myField, column, forceNull); + } else { + field = new FieldDefault(myField, column, forceNull); + } + break; + case COL_INT: + if (isUnsigned) { + field = new FieldDefault(myField, column, forceNull); + } else { + field = new FieldDefault(myField, column, forceNull); + } + break; + case COL_DOUBLE: + field = new FieldDefault(myField, column, forceNull); + break; + case COL_TIMESTAMP: + case COL_LONG: + if (isUnsigned) { + field = new FieldDefault(myField, column, forceNull); + } else { + field = new FieldDefault(myField, column, forceNull); + } + break; + case COL_STRING: + case COL_BLOB: + field = new FieldDefaultString(myField, column, forceNull); + break; + default: assert(0); break; + } + break; + } + case FIELD_SKIP: { + switch (column.getType()) { + case COL_BYTE: + field = new FieldSkip<1>(column); + break; + case COL_SHORT: + field = new FieldSkip<2>(column); + break; + case COL_INT: + field = new FieldSkip<4>(column); + break; + case COL_DOUBLE: + case COL_TIMESTAMP: + case COL_LONG: + field = new FieldSkip<8>(column); + break; + case COL_STRING: + case COL_BLOB: + field = new FieldSkip<16>(column); + break; + default: assert(0); break; + } + break; + } + default: assert(0); break; + } + fields.append(field); + if (FieldBase::exists(serial, column)) { + k++; + } + } +} + +FieldBase::FieldBase(Field* myField, const Column& column) + : column_(column) +#ifndef NDEBUG + , onlyKeyFormat_(myField == 0) +#endif +{ + if (myField == 0) { + nullOffset_ = 0; + nullBit_ = 0; + offset_ = 0; + } else { + uchar* tableBuffer = myField->table->record[0]; + nullOffset_ = myField->is_nullable() ? myField->null_offset() : UINT_MAX; + nullBit_ = myField->null_bit; + offset_ = myField->offset(tableBuffer); + } +} + +// STATIC +FieldType FieldBase::getFieldType(const uint32_t serial, const bool coalescing, const Column& column) { + const uint32_t dropSerial = column.getDropSerial(); + if (serial >= column.getSerial()) { + if (dropSerial == 0) { + // This is an existing column for which we have data. + return FIELD_NORMAL; + } else { + if (serial >= dropSerial) { + // This is a deleted column for which we have no data. + return FIELD_NONE; + } else { + // This is a deleted column for which we have data. + return coalescing ? FIELD_NORMAL : FIELD_SKIP; + } + } + } else { + if (dropSerial == 0) { + // This is a new column for which we have no data. + return coalescing ? FIELD_NONE : FIELD_DEFAULT; + } else { + // This is a deleted column for which we have no data. + return FIELD_NONE; + } + } +} + +// STATIC +bool FieldBase::exists(const uint32_t serial, const Column& column) { + const uint32_t dropSerial = column.getDropSerial(); + if (serial >= column.getSerial()) { + // This column was created before this partition and it was not dropped or it was dropped after this partition was created, + // therefore the column exists in this partition. + return (dropSerial == 0 || serial < dropSerial); + } else { + // This column was created after this partition, therefore this column does not exist in this partition + return false; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldString +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void FieldString::readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + const bool isNull = isNullable() && (bits & 1) != 0; + if (isNull) { + reader.advance(16); + if (keyFormat) { + *buffer = 1; + } else { + buffer[nullOffset_] |= nullBit_; + } + } else { + const uint32_t l = isNullable() ? (bits >> 1) : bits; + if (l == 0) { + // Long string. + uint64_t position; + uint64_t length; + reader >> position >> length; + if (length == 0) { + store(buffer, keyFormat, reader.getData(), 0); + } else if ( length > 1000000 ) { + throw SparrowException::create(false, "internal error: string field length outrageously too big"); + } else { + stringReader.seekBin(position); + if (stringReader.position() + length <= stringReader.limit()) { + // The string is entirely contained in the buffer. + store(buffer, keyFormat, stringReader.getCurrentData(), static_cast(length)); + } else { + // Need to read it across several buffers; use a temporary buffer. + ByteBuffer stringBuffer(static_cast(IOContext::getTempBuffer1(length)), length); + stringReader >> stringBuffer; + store(buffer, keyFormat, stringBuffer.getData(), static_cast(length)); + } + } + } else { + // Small string. + uint8_t v[16]; + ByteBuffer b(v, static_cast(sizeof(v))); + reader >> b; + store(buffer, keyFormat, v, l); + } + } +} + +void FieldString::readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + const BinString& string = *(const BinString*)data; + if (keyFormat) { + if (isNull) { + *buffer = 1; + } else { + store(buffer, keyFormat, string.getData(), string.getLength()); + } + } else { + if (isNull) { + buffer[nullOffset_] |= nullBit_; + } else { + store(buffer, keyFormat, string.getData(), string.getLength()); + } + } +} + +void FieldString::insertTransform(const uint8_t* buffer, ByteBuffer& output) const { + assert(!onlyKeyFormat_); + if (getColumn().isFlagSet(COL_IP_LOOKUP)) { + return; + } + const bool nullable = isNullable(); + if (nullable && (buffer[nullOffset_] & nullBit_)) { + output << static_cast(1); + } else { + if (nullable) { + output << static_cast(0); + } + uint16_t length; + const uint8_t* data = buffer + offset_; + if (lengthBytes_ == 1) { + length = *data++; + } else { + FAST_LOAD2_L(data, length); + data += 2; + } + output << static_cast(length); + output << ByteBuffer(data, length); + } +} + +void FieldString::store(uint8_t* buffer, const bool keyFormat, const uint8_t* data, const uint16_t length) const { + assert(keyFormat || !onlyKeyFormat_); + uint8_t* p; + uint32_t lengthBytes; + if (keyFormat) { + if (isNullable()) { + *buffer++ = 0; + } + p = buffer; + lengthBytes = 2; + } else { + p = buffer + offset_; + lengthBytes = lengthBytes_; + } + int wellFormedError; + const uint16_t wlength = (cs_ == &my_charset_bin || fieldLength_ == 0 || length <= fieldLength_) ? length + : static_cast(cs_->cset->well_formed_len(cs_, reinterpret_cast(data), + reinterpret_cast(data) + fieldLength_, UINT_MAX, &wellFormedError)); + if (lengthBytes == 1) { + *p++ = static_cast(wlength); + } else { + FAST_STORE2_L(p, wlength); + p += 2; + } + memcpy(p, data, wlength); +} + +// Important note: +// Both strings must be in key format, where the length is stored on exactly two bytes. +int FieldString::compare(const uint8_t* left, const uint8_t* right) const { + uint16_t leftLength; + FAST_LOAD2_L(left, leftLength); + left += 2; + uint16_t rightLength; + FAST_LOAD2_L(right, rightLength); + right += 2; + return cs_->coll->strnncollsp(cs_, left, leftLength, right, rightLength); +} + +void FieldString::copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const { + const bool isNull = isNullable() && (bits & 1) != 0; + if (isNull) { + reader.advance(16); + buffer << static_cast(0) << static_cast(0); + } else { + const uint32_t l = isNullable() ? (bits >> 1) : bits; + if (l == 0) { + // Long string. + uint64_t position; + uint64_t length; + reader >> position >> length; + if (length > 0 && binBuffer != 0) { + stringReader.seekBin(position); + BinString* s; + if (stringReader.position() + length <= stringReader.limit()) { + // The string is entirely contained in the buffer. + s = binBuffer->insert(stringReader.getCurrentData(), static_cast(length)); + } else { + // Need to read it across several buffers; use a temporary buffer. + ByteBuffer stringBuffer(static_cast(IOContext::getTempBuffer1(length)), length); + stringReader >> stringBuffer; + s = binBuffer->insert(stringBuffer.getData(), static_cast(length)); + } + position = s->getPosition(binBuffer->getData()); + } + buffer << (length == 0 ? static_cast(0) : position) << length; + } else { + // Small string. + uint8_t v[16]; + ByteBuffer b(v, static_cast(sizeof(v))); + reader >> b; + buffer << ByteBuffer(v, static_cast(sizeof(v))); + } + } +} + +int FieldString::compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* binBuffer) const { + const bool nullable = isNullable(); + const bool isNull1 = nullable && (bits1 & 1) != 0; + const bool isNull2 = nullable && (bits2 & 1) != 0; + if (isNull1) { + if (isNull2) { + return 0; + } else { + return -1; + } + } else if (isNull2) { + return 1; + } + uint64_t length1 = nullable ? (bits1 >> 1) : bits1; + uint8_t* s1; + uint8_t v1[16]; + if (length1 == 0) { + uint64_t pos1; + buffer1 >> pos1 >> length1; + if (binBuffer == 0) { + if (length1 > 0) { + stringReader1.seekBin(pos1); + if (stringReader1.position() + length1 <= stringReader1.limit()) { + // The string is entirely contained in the buffer. + s1 = stringReader1.getCurrentData(); + } else { + // Need to read it across several buffers; use a temporary buffer. + ByteBuffer stringBuffer(static_cast(IOContext::getTempBuffer1(length1)), length1); + stringReader1 >> stringBuffer; + s1 = stringBuffer.getData(); + } + } else { + s1 = v1; + } + } else { + s1 = const_cast(binBuffer->getData() + pos1); + } + } else { + ByteBuffer b(v1, 16); + buffer1 >> b; + s1 = v1; + } + int wellFormedError; + const uint16_t wlength1 = (cs_ == &my_charset_bin || fieldLength_ == 0 || length1 <= fieldLength_) ? static_cast(length1) + : static_cast(cs_->cset->well_formed_len(cs_, reinterpret_cast(s1), + reinterpret_cast(s1) + fieldLength_, UINT_MAX, &wellFormedError)); + uint64_t length2 = nullable ? (bits2 >> 1) : bits2; + uint8_t* s2; + uint8_t v2[16]; + if (length2 == 0) { + uint64_t pos2; + buffer2 >> pos2 >> length2; + if (binBuffer == 0) { + if (length2 > 0) { + stringReader2.seekBin(pos2); + if (stringReader2.position() + length2 <= stringReader2.limit()) { + // The string is entirely contained in the buffer. + s2 = stringReader2.getCurrentData(); + } else { + // Need to read it across several buffers; use a temporary buffer. + ByteBuffer stringBuffer(static_cast(IOContext::getTempBuffer2(length2)), length2); + stringReader2 >> stringBuffer; + s2 = stringBuffer.getData(); + } + } else { + s2 = v2; + } + } else { + s2 = const_cast(binBuffer->getData() + pos2); + } + } else { + ByteBuffer b(v2, 16); + buffer2 >> b; + s2 = v2; + } + const uint16_t wlength2 = (cs_ == &my_charset_bin || fieldLength_ == 0 || length2 <= fieldLength_) ? static_cast(length2) + : static_cast(cs_->cset->well_formed_len(cs_, reinterpret_cast(s2), + reinterpret_cast(s2) + fieldLength_, UINT_MAX, &wellFormedError)); + return cs_->coll->strnncollsp(cs_, s1, wlength1, s2, wlength2); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldTimestamp +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void FieldTimestamp::readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + const bool isNull = (bits == 1); + uint64_t v; + if (isNull) { + reader.advance(sizeof(v)); + } else { + reader >> v; + } + if (keyFormat) { + if (isNull) { + *buffer = 1; + } else { + if (isNullable()) { + *buffer++ = 0; + } + writeMySql( v, buffer ); + } + } else { + if (isNull) { + buffer[nullOffset_] |= nullBit_; + } else { + writeMySql( v, buffer + offset_ ); + } + } +} + +void FieldTimestamp::readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + + if (keyFormat) { + if (isNull) { + *buffer = 1; + } else { + if (isNullable()) { + *buffer++ = 0; + } + writeMySql( data, buffer ); + } + } else { + if (isNull) { + buffer[nullOffset_] |= nullBit_; + } else { + writeMySql( data, buffer + offset_ ); + } + } +} + +bool FieldTimestamp::readMySqlTransient(const uint8_t* buffer, uint64_t& data) const { + assert(!onlyKeyFormat_); + if (isNullable() && (buffer[nullOffset_] & nullBit_)) { + return true; + } else { + data = readMySql( buffer + offset_ ); + return false; + } +} + +bool FieldTimestamp::readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const { + assert(!onlyKeyFormat_); + if (isNullable() && (buffer[nullOffset_] & nullBit_)) { + output << static_cast(0); + return true; + } else { + uint64_t data = readMySql( buffer + offset_ ); + output << data; + return false; + } +} + +void FieldTimestamp::insertTransform(const uint8_t* buffer, ByteBuffer& output) const { + assert(!onlyKeyFormat_); + const bool nullable = isNullable(); + if (nullable && (buffer[nullOffset_] & nullBit_)) { + output << static_cast(1); + } else { + if (nullable) { + output << static_cast(0); + } + uint64_t data = readMySql( buffer + offset_ ); + output << data; + } +} + +int FieldTimestamp::compare(const uint8_t* left, const uint8_t* right) const { + uint64_t l = readMySql( left ); + uint64_t r = readMySql( right ); + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +uint64_t FieldTimestamp::readMySql( const uint8_t* buffer, uint32_t offset ) const { + uint32_t t = 0, tf = 0; + if ( dec_ < 0 ) { + FAST_LOAD4_L(buffer + offset, t); + } else { + FAST_LOAD4_B(buffer + offset, t); + switch (dec_) + { + case 1: + case 2: tf = buffer[offset+4]; tf *= 10; break; + case 3: + case 4: FAST_LOAD2_B(buffer + offset + 4, tf); tf /= 10; break; + case 5: + case 6: FAST_LOAD3_B(buffer + offset + 4, tf); tf /= 1000; break; + } + } + uint64_t data = static_cast(t) * 1000 + tf; + return data; +} + +void FieldTimestamp::writeMySql( uint64_t data, uint8_t* buffer, uint32_t offset ) const { + const uint32_t t = static_cast(data / 1000); // Timestamp in seconds + uint32_t tf = static_cast(data % 1000); // Fractional part of the timestamp in ms. + uint8_t* p = buffer + offset; + if ( dec_ < 0 ) { + FAST_STORE4_L(p, t); + } else { + FAST_STORE4_B(p, t); + p += 4; + switch (dec_) + { + case 0: break; + case 1: + case 2: tf /= 10; *p = static_cast(tf); break; + case 3: + case 4: tf *= 10; FAST_STORE2_B(p, tf); break; + case 5: + case 6: tf *= 1000; FAST_STORE3_B(p, tf); break; + } + } +} + +void FieldTimestamp::copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const { + uint64_t v; + reader >> v; + buffer << v; +} + +int FieldTimestamp::compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* binBuffer) const { + uint64_t l; + uint64_t r; + buffer1 >> l; + buffer2 >> r; + const bool isNull1 = (bits1 == 1); + const bool isNull2 = (bits2 == 1); + if (isNull1) { + if (isNull2) { + return 0; + } else { + return -1; + } + } else if (isNull2) { + return 1; + } + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldDefault +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template void FieldDefault::computeBinDefaultValue(Field* myField) { + const bool isNullable = column_.isFlagSet(COL_NULLABLE); + CHARSET_INFO* cs = myField == 0 ? get_charset_by_name(column_.getCharset().c_str(), MYF(MY_WME)) : const_cast(myField->charset()); + const Str& defaultValue = column_.getDefaultValue(); + const uint32_t length = defaultValue.length(); + if (length == 0 && isNullable) { + // This is a NULL. + value_ = BinValue(); + } else { + const char* s = defaultValue.c_str(); + int error = 0; + const char* end; + const int isUnsigned = column_.isFlagSet(COL_UNSIGNED) ? 1 : 0; + switch (column_.getType()) { + case COL_BYTE: { + value_ = BinValue(1); + const uint64_t x = length == 0 ? 0 : cs->cset->strntoull10rnd(cs, s, length, isUnsigned, &end, &error); + *value_.data() = static_cast(x & 0xff); + break; + } + case COL_SHORT: { + value_ = BinValue(2); + const uint64_t x = length == 0 ? 0 : cs->cset->strntoull10rnd(cs, s, length, isUnsigned, &end, &error); + const uint16_t v = static_cast(x & 0xffff); + uint8_t* d = value_.data(); + FAST_STORE2_L(d, v); + break; + } + case COL_INT: { + value_ = BinValue(4); + const uint64_t x = length == 0 ? 0 : cs->cset->strntoull10rnd(cs, s, length, isUnsigned, &end, &error); + const uint32_t v = static_cast(x & 0xffffffff); + uint8_t* d = value_.data(); + FAST_STORE4_L(d, v); + break; + } + case COL_DOUBLE: { + value_ = BinValue(8); + const double x = length == 0 ? 0 : cs->cset->strntod(cs, const_cast(s), length, &end, &error); + const uint64_t v = *reinterpret_cast(&x); + uint8_t* d = value_.data(); + FAST_STORE8_L(d, v); + break; + } + case COL_TIMESTAMP: { + value_ = BinValue(4); + MYSQL_TIME t; + MYSQL_TIME_STATUS status; +#ifndef NDEBUG + const bool result = +#endif + str_to_datetime(s, length, &t, TIME_DATETIME_ONLY, &status); + assert(result == true); + error = status.warnings; + THD* thd = current_thd; + Time_zone* tz = thd == 0 ? my_tz_SYSTEM : thd->variables.time_zone; + bool dummy; + my_time_t x = length == 0 ? 0 : tz->TIME_to_gmt_sec(&t, &dummy); + const uint32_t v = static_cast(x & 0xffffffff); + uint8_t* d = value_.data(); + FAST_STORE4_L(d, v); + break; + } + case COL_LONG: { + value_ = BinValue(8); + const uint64_t x = length == 0 ? 0 : cs->cset->strntoull10rnd(cs, s, length, isUnsigned, &end, &error); + uint8_t* d = value_.data(); + FAST_STORE8_L(d, x); + break; + } + default: break; + } + assert(error == 0); + } +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldDefaultString +////////////////////////////////////////////////////////////////////////////////////////////////////// + +void FieldDefaultString::computeBinDefaultValue(Field* myField) { + const bool isNullable = column_.isFlagSet(COL_NULLABLE); + const Str& defaultValue = column_.getDefaultValue(); + const uint32_t length = defaultValue.length(); + if (length == 0 && isNullable) { + // This is a NULL. + value_ = BinValue(); + } else { + const char* s = defaultValue.c_str(); + value_ = BinValue(2 + length); + uint8_t* p = value_.data(); + FAST_STORE2_L(p, length); + memcpy(p + 2, s, length); + } +} + +} diff --git a/storage/sparrow/handler/field.h b/storage/sparrow/handler/field.h new file mode 100644 index 000000000000..61503a4dd5ab --- /dev/null +++ b/storage/sparrow/handler/field.h @@ -0,0 +1,901 @@ +/* + Fields mapping MySQL fields. +*/ + +#ifndef _handler_field_h_ +#define _handler_field_h_ + +#include "sql/field.h" +#include "../engine/fileutil.h" +#include "../engine/types.h" +#include "../engine/binbuffer.h" + + +// These warnings are triggered by the way we serialize / deserialize values to/from the +// data partition. This is work-as-designed, so these warnings should be ignored. +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wcast-qual" +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldBase +////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* Fields are used to read data from Sparrow partitions (persistent and transient), and offer some + helper functions: compare field values, format a value to send back to MySQL. They mimic the + MySQL Field class. FieldBase is the base class, the interface. Each Sparrow type has its own + implementation of the FieldBase interface. +*/ +class FieldBase; +typedef SYSpVector TableFields; +class FieldBase { +protected: + + Column column_; + + // To fill MySQL buffers when not using key format. + uint32_t nullOffset_; + uint32_t nullBit_; + uint32_t offset_; + +#ifndef NDEBUG + // If true, only key format is available. + const bool onlyKeyFormat_; +#endif + +private: + + static FieldType getFieldType(const uint32_t serial, const bool coalescing, const Column& column); + static bool exists(const uint32_t serial, const Column& column); + +public: + + static void createFields(const uint32_t serial, const bool coalescing, Field** myFields, const Columns& columns, TableFields& fields, const ColumnIds* skippedColumnIds = NULL); + + FieldBase(Field* myField, const Column& column); + virtual ~FieldBase() { + } + const Column& getColumn() const { + return column_; + } + bool isNullable() const { + return column_.isFlagSet(COL_NULLABLE); + } + virtual bool isMapped() const = 0; + virtual uint32_t getLength(const bool keyFormat) const = 0; // Length in output buffer. + virtual uint32_t getSize() const = 0; // Size in data file. + virtual uint32_t getBits() const = 0; + virtual void skip(ByteBuffer& buffer) const = 0; + virtual void readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const = 0; + virtual void readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const = 0; + virtual bool readMySqlTransient(const uint8_t* buffer, uint64_t& data) const = 0; + virtual bool readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const = 0; + virtual void insertTransform(const uint8_t* buffer, ByteBuffer& output) const = 0; + virtual int compare(const uint8_t* left, const uint8_t* right) const = 0; + + // Used by coalescing and index alteration. + virtual void copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const = 0; + virtual int compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* binBuffer) const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TableFieldsGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TableFieldsGuard { +private: + + TableFields fields_; + bool owned_; + +public: + + TableFieldsGuard() : owned_(true) { + } + + void release() { + owned_ = false; + } + + ~TableFieldsGuard() { + if (owned_) { + fields_.clearAndDestroy(); + } + } + + TableFields& get() { + return fields_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldTimestamp +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FieldTimestamp : public FieldBase { +private: + + int dec_; // Number of decimals (0 to 6) if type is MYSQL_TYPE_TIMESTAMP2 (MySQL class Field_timestampf), + // -1 if type is MYSQL_TYPE_TIMESTAMP (MySQL class Field_timestamp) + + uint64_t readMySql(const uint8_t* buffer, uint32_t offset=0) const; + void writeMySql(uint64_t data, uint8_t* buffer, uint32_t offset=0) const; + +public: + + FieldTimestamp(Field* myField, const Column& column) + : FieldBase(myField, column) { + if ( myField == 0 || myField->real_type() != MYSQL_TYPE_TIMESTAMP2 ) { + dec_ = -1; + } else { + dec_ = myField->decimals(); + } + } + bool isMapped() const override { + return true; + } + uint32_t getLength(const bool keyFormat) const override { // Length in output buffer. + // Non fractional part followed by fractional part. + switch ( dec_ ) { + case -1: + case 0: return 4; + case 1: + case 2: return 4+1; + case 3: + case 4: return 4+2; + case 5: + case 6: return 4+3; + } + return 4; + } + uint32_t getSize() const override { // Size in data file. + return 8; + } + uint32_t getBits() const override { + return column_.getBits(); + } + void skip(ByteBuffer& buffer) const override { + buffer.advance(column_.getDataSize()); + } + void readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const override; + void readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const override; + bool readMySqlTransient(const uint8_t* buffer, uint64_t& data) const override; + bool readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const override; + void insertTransform(const uint8_t* buffer, ByteBuffer& output) const override; + int compare(const uint8_t* left, const uint8_t* right) const override; + void copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const override; + int compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* buffer) const override; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldSimple +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class FieldSimple : public FieldBase { +private: + + static T convert(const uint64_t data); + void marshall(uint8_t* buffer, const T v) const; + uint64_t unmarshall(const uint8_t* buffer) const; + +public: + + FieldSimple(Field* myField, const Column& column) + : FieldBase(myField, column) { + } + bool isMapped() const override { + return true; + } + uint32_t getLength(const bool keyFormat) const override { + return sizeof(T); + } + uint32_t getSize() const override { + return sizeof(T); + } + uint32_t getBits() const override { + return column_.getBits(); + } + void skip(ByteBuffer& buffer) const override { + buffer.advance(column_.getDataSize()); + } + void readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const override; + void readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const override; + bool readMySqlTransient(const uint8_t* buffer, uint64_t& data) const override; + bool readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const override; + void insertTransform(const uint8_t* buffer, ByteBuffer& output) const override; + static int compareSimple(const uint8_t* left, const uint8_t* right); + int compare(const uint8_t* left, const uint8_t* right) const override { + return compareSimple(left, right); + } + void copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const override; + int compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* buffer) const override; +}; + +template inline T FieldSimple::convert(const uint64_t data) { + return static_cast(data); +} + +template inline void FieldSimple::readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + T v; + reader >> v; + const bool isNull = (bits == 1); + if (keyFormat) { + if (isNull) { + *buffer = 1; + } else { + if (isNullable()) { + *buffer++ = 0; + } + marshall(buffer, v); + } + } else { + if (isNull) { + buffer[nullOffset_] |= nullBit_; + } else { + marshall(buffer + offset_, v); + } + } +} + +template inline void FieldSimple::readTransient(const uint64_t data, bool isNull, uint8_t* buffer, bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + if (keyFormat) { + if (isNull) { + *buffer = 1; + } else { + if (isNullable()) { + *buffer++ = 0; + } + marshall(buffer, convert(data)); + } + } else { + if (isNull) { + buffer[nullOffset_] |= nullBit_; + } else { + marshall(buffer + offset_, convert(data)); + } + } +} + +template inline bool FieldSimple::readMySqlTransient(const uint8_t* buffer, uint64_t& data) const { + assert(!onlyKeyFormat_); + if (isNullable() && (buffer[nullOffset_] & nullBit_)) { + return true; + } else { + data = unmarshall(buffer + offset_); + return false; + } +} + +template inline bool FieldSimple::readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const { + assert(!onlyKeyFormat_); + if (isNullable() && (buffer[nullOffset_] & nullBit_)) { + output << static_cast(0); + return true; + } else { + const uint64_t v = unmarshall(buffer + offset_); + if (sizeof(T) != sizeof(v)) { + output << static_cast(v); + } else { + output << v; + } + return false; + } +} + +template inline void FieldSimple::insertTransform(const uint8_t* buffer, ByteBuffer& output) const { + assert(!onlyKeyFormat_); + if (getColumn().isFlagSet(COL_AUTO_INC)) { + return; + } + const bool nullable = isNullable(); + if (nullable && (buffer[nullOffset_] & nullBit_)) { + output << static_cast(1); + } else { + if (nullable) { + output << static_cast(0); + } + const uint64_t data = unmarshall(buffer + offset_); + if (sizeof(T) != sizeof(data)) { + output << static_cast(data); + } else { + output << data; + } + } +} + +template inline void FieldSimple::copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const { + T v; + reader >> v; + buffer << v; +} + +template inline int FieldSimple::compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* binBuffer) const { + const bool isNull1 = (bits1 == 1); + const bool isNull2 = (bits2 == 1); + if (isNull1) { + buffer1.advance(sizeof(T)); + buffer2.advance(sizeof(T)); + if (isNull2) { + return 0; + } else { + return -1; + } + } else if (isNull2) { + buffer1.advance(sizeof(T)); + buffer2.advance(sizeof(T)); + return 1; + } + T l; + buffer1 >> l; + T r; + buffer2 >> r; + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +// Implementations for different simple types. +template<> inline void FieldSimple::marshall(uint8_t* buffer, const uint8_t v) const { + *buffer = v; +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + return static_cast(*buffer); +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + const uint8_t l = *left; + const uint8_t r = *right; + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const int8_t v) const { + *buffer = static_cast(v); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + return static_cast(*buffer); +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + const int8_t l = static_cast(*left); + const int8_t r = static_cast(*right); + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const uint16_t v) const { + FAST_STORE2_L(buffer, v); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + uint16_t v; + FAST_LOAD2_L(buffer, v); + return static_cast(v); +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + uint16_t l; + FAST_LOAD2_L(left, l); + uint16_t r; + FAST_LOAD2_L(right, r); + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const int16_t v) const { + FAST_STORE2_L(buffer, static_cast(v)); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + uint16_t v; + FAST_LOAD2_L(buffer, v); + return static_cast(v); +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + uint16_t l; + FAST_LOAD2_L(left, l); + uint16_t r; + FAST_LOAD2_L(right, r); + return (static_cast(l) < static_cast(r)) ? -1 : (static_cast(l) > static_cast(r)) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const uint32_t v) const { + FAST_STORE4_L(buffer, v); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + uint32_t v; + FAST_LOAD4_L(buffer, v); + return static_cast(v); +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + uint32_t l; + FAST_LOAD4_L(left, l); + uint32_t r; + FAST_LOAD4_L(right, r); + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const int32_t v) const { + FAST_STORE4_L(buffer, static_cast(v)); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + uint32_t v; + FAST_LOAD4_L(buffer, v); + return static_cast(v); +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + uint32_t l; + FAST_LOAD4_L(left, l); + uint32_t r; + FAST_LOAD4_L(right, r); + return (static_cast(l) < static_cast(r)) ? -1 : (static_cast(l) > static_cast(r)) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const uint64_t v) const { + FAST_STORE8_L(buffer, v); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + uint64_t v; + FAST_LOAD8_L(buffer, v); + return v; +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + uint64_t l; + FAST_LOAD8_L(left, l); + uint64_t r; + FAST_LOAD8_L(right, r); + return (l < r) ? -1 : (l > r) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const int64_t v) const { + FAST_STORE8_L(buffer, static_cast(v)); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + uint64_t v; + FAST_LOAD8_L(buffer, v); + return v; +} + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + uint64_t l; + FAST_LOAD8_L(left, l); + uint64_t r; + FAST_LOAD8_L(right, r); + return (static_cast(l) < static_cast(r)) ? -1 : (static_cast(l) > static_cast(r)) ? 1 : 0; +} + +template<> inline void FieldSimple::marshall(uint8_t* buffer, const double v) const { + const uint64_t l = *reinterpret_cast(&v); + FAST_STORE8_L(buffer, l); +} + +template<> inline uint64_t FieldSimple::unmarshall(const uint8_t* buffer) const { + uint64_t v; + FAST_LOAD8_L(buffer, v); + return v; +} + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +// gcc 8.5.0 emits the following warning on variable dl, but i haven't been able to figure out why. +// Running the same code in another test program does not generate this warning. Is this warning +// a gcc bug ? +// warning: 'l' is used uninitialized [-Wuninitialized] +// const double dl = *reinterpret_cast(&l); +#endif + +// STATIC +template<> inline int FieldSimple::compareSimple(const uint8_t* left, const uint8_t* right) { + uint64_t l = 0; + FAST_LOAD8_L(left, l); + uint64_t r(0); + FAST_LOAD8_L(right, r); + const double dl = *reinterpret_cast(&l); + const double dr = *reinterpret_cast(&r); + return (dl < dr) ? -1 : (dl > dr ? 1 : 0); +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + + +template<> inline double FieldSimple::convert(const uint64_t data) { + return *reinterpret_cast(&data); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldString +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FieldString : public FieldBase { +private: + + CHARSET_INFO* cs_; + uint32_t lengthBytes_; + uint32_t fieldLength_; + +private: + + void store(uint8_t* buffer, const bool keyFormat, const uint8_t* data, const uint16_t length) const; + +public: + + FieldString(Field* myField, const Column& column) + : FieldBase(myField, column) { + if (myField == 0) { + cs_ = get_charset_by_name(column.getCharset().c_str(), MYF(MY_WME)); + lengthBytes_ = 0; + fieldLength_ = 0; + } else { + cs_ = const_cast(myField->charset()); + lengthBytes_ = static_cast(myField)->get_length_bytes(); + fieldLength_ = myField->field_length; + } + } + bool isMapped() const override { + return true; + } + uint32_t getLength(const bool keyFormat) const override { + return (keyFormat ? 2 : lengthBytes_) + fieldLength_; + } + uint32_t getSize() const override { + return 16; + } + uint32_t getBits() const override { + return column_.getBits(); + } + void skip(ByteBuffer& buffer) const override { + buffer.advance(column_.getDataSize()); + } + void readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const override; + void readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const override; + bool readMySqlTransient(const uint8_t* buffer, uint64_t& data) const override { + assert(0); + return false; + } + bool readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const override { + assert(0); + return false; + } + void insertTransform(const uint8_t* buffer, ByteBuffer& output) const override; + int compare(const uint8_t* left, const uint8_t* right) const override; + void copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const override; + int compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* buffer) const override; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldSkip +////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class FieldSkip : public FieldBase { +public: + + FieldSkip(const Column& column) + : FieldBase(0, column) { + } + bool isMapped() const override { + return false; + } + uint32_t getLength(const bool keyFormat) const override { + return S; + } + uint32_t getSize() const override { + return S; + } + uint32_t getBits() const override { + return column_.getBits(); + } + void skip(ByteBuffer& buffer) const override { + } + void readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const override; + void readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const override; + bool readMySqlTransient(const uint8_t* buffer, uint64_t& data) const override; + bool readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const override; + void insertTransform(const uint8_t* buffer, ByteBuffer& output) const override; + int compare(const uint8_t* left, const uint8_t* right) const override; + void copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const override; + int compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* buffer) const override; +}; + +template inline void FieldSkip::readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const { + reader.advance(S); +} + +template inline void FieldSkip::readTransient(const uint64_t data, bool isNull, uint8_t* buffer, bool keyFormat) const { +} + +template inline bool FieldSkip::readMySqlTransient(const uint8_t* buffer, uint64_t& data) const { + assert(0); + return false; +} + +template inline bool FieldSkip::readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const { + output.advance(sizeof(S)); + return false; +} + +template inline void FieldSkip::insertTransform(const uint8_t* buffer, ByteBuffer& output) const { + assert(0); +} + +template inline int FieldSkip::compare(const uint8_t* left, const uint8_t* right) const { + return 0; +} + +template inline void FieldSkip::copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const { +} + +template inline int FieldSkip::compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* binBuffer) const { + assert(0); + return 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldDefault +////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef SYSarray BinValue; +template class FieldDefault : public FieldBase { +private: + BinValue value_; + +private: + + void computeBinDefaultValue(Field* myField); + void marshall(uint8_t* buffer, const bool keyFormat) const; + +public: + + FieldDefault(Field* myField, const Column& column, const bool forceNull) + : FieldBase(myField, column) { + if (!forceNull) { + computeBinDefaultValue(myField); + } + } + bool isMapped() const override { + return true; + } + uint32_t getLength(const bool keyFormat) const override { + return sizeof(T); + } + uint32_t getSize() const override { + return 0; + } + uint32_t getBits() const override { + return 0; + } + void skip(ByteBuffer& buffer) const override { + } + void readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const override; + void readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const override; + bool readMySqlTransient(const uint8_t* buffer, uint64_t& data) const override; + bool readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const override; + void insertTransform(const uint8_t* buffer, ByteBuffer& output) const override; + int compare(const uint8_t* left, const uint8_t* right) const override; + void copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const override; + int compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* buffer) const override; +}; + +template inline void FieldDefault::marshall(uint8_t* buffer, const bool keyFormat) const { + const uint8_t* p = value_.data(); + const uint32_t length = value_.length(); + const bool isNull = length == 0; + if (keyFormat) { + if (isNull) { + *buffer = 1; + } else { + if (isNullable()) { + *buffer++ = 0; + } + memcpy(buffer, p, length); + } + } else { + if (isNull) { + buffer[nullOffset_] |= nullBit_; + } else { + buffer += offset_; + memcpy(buffer, p, length); + } + } +} + +template inline void FieldDefault::readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + assert(bits == 0); + marshall(buffer, keyFormat); +} + +template inline void FieldDefault::readTransient(const uint64_t data, bool isNull, uint8_t* buffer, bool keyFormat) const { + assert(keyFormat || !onlyKeyFormat_); + marshall(buffer, keyFormat); +} + +template inline bool FieldDefault::readMySqlTransient(const uint8_t* buffer, uint64_t& data) const { + assert(0); + return false; +} + +template inline bool FieldDefault::readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const { + assert(0); + return false; +} + +template inline void FieldDefault::insertTransform(const uint8_t* buffer, ByteBuffer& output) const { + assert(0); +} + +template inline void FieldDefault::copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const { +} + +template inline int FieldDefault::compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* binBuffer) const { + assert(0); + return 0; +} + +template inline int FieldDefault::compare(const uint8_t* left, const uint8_t* right) const { + return FieldSimple::compareSimple(left, right); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// FieldDefaultString +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class FieldDefaultString : public FieldBase { +private: + + CHARSET_INFO* cs_; + uint32_t lengthBytes_; + uint32_t fieldLength_; + BinValue value_; + +private: + + void computeBinDefaultValue(Field* myField); + void marshall(uint8_t* buffer, const bool keyFormat) const; + +public: + + FieldDefaultString(Field* myField, const Column& column, const bool forceNull) + : FieldBase(myField, column) { + if (myField == 0) { + cs_ = get_charset_by_name(column.getCharset().c_str(), MYF(MY_WME)); + lengthBytes_ = 0; + fieldLength_ = 0; + } else { + cs_ = const_cast(myField->charset()); + lengthBytes_ = static_cast(myField)->get_length_bytes(); + fieldLength_ = myField->field_length; + } + if (!forceNull) { + computeBinDefaultValue(myField); + } + } + bool isMapped() const override { + return true; + } + uint32_t getLength(const bool keyFormat) const override { + return (keyFormat ? 2 : lengthBytes_) + fieldLength_; + } + uint32_t getSize() const override { + return 0; + } + uint32_t getBits() const override { + return 0; + } + void skip(ByteBuffer& buffer) const override { + } + void readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const override; + void readTransient(const uint64_t data, const bool isNull, uint8_t* buffer, const bool keyFormat) const override; + bool readMySqlTransient(const uint8_t* buffer, uint64_t& data) const override; + bool readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const override; + void insertTransform(const uint8_t* buffer, ByteBuffer& output) const override; + int compare(const uint8_t* left, const uint8_t* right) const override; + void copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const override; + int compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* buffer) const override; +}; + +inline void FieldDefaultString::marshall(uint8_t* buffer, const bool keyFormat) const { + const uint8_t* p = value_.data(); + const uint32_t length = value_.length(); + const bool isNull = length == 0; + if (keyFormat) { + if (isNull) { + *buffer = 1; + } else { + if (isNullable()) { + *buffer++ = 0; + } + memcpy(buffer, p, length); + } + } else { + if (isNull) { + buffer[nullOffset_] |= nullBit_; + } else { + buffer += offset_; + if (lengthBytes_ == 2) { + // Binary value is built with length on 2 bytes. + memcpy(buffer, p, length); + } else { + uint16_t n; + FAST_LOAD2_L(p, n); + *buffer++ = static_cast(n); + memcpy(buffer, p + 2, length - 2); + } + } + } +} + +inline void FieldDefaultString::readPersistent(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, uint8_t* buffer, const bool keyFormat) const { + marshall(buffer, keyFormat); +} + +inline void FieldDefaultString::readTransient(const uint64_t data, bool isNull, uint8_t* buffer, bool keyFormat) const { + marshall(buffer, keyFormat); +} + +inline bool FieldDefaultString::readMySqlTransient(const uint8_t* buffer, uint64_t& data) const { + assert(0); + return false; +} + +inline bool FieldDefaultString::readMySqlPersistent(const uint8_t* buffer, ByteBuffer& output) const { + assert(0); + return false; +} + +inline void FieldDefaultString::insertTransform(const uint8_t* buffer, ByteBuffer& output) const { + assert(0); +} + +inline int FieldDefaultString::compare(const uint8_t* left, const uint8_t* right) const { + uint16_t leftLength; + FAST_LOAD2_L(left, leftLength); + left += 2; + uint16_t rightLength; + FAST_LOAD2_L(right, rightLength); + right += 2; + return cs_->coll->strnncollsp(cs_, left, leftLength, right, rightLength); +} + +inline void FieldDefaultString::copy(PartitionReader& reader, PartitionReader& stringReader, const uint8_t bits, ByteBuffer& buffer, BinBuffer* binBuffer) const { +} + +inline int FieldDefaultString::compare(ByteBuffer& buffer1, const uint8_t bits1, PartitionReader& stringReader1, + ByteBuffer& buffer2, const uint8_t bits2, PartitionReader& stringReader2, BinBuffer* binBuffer) const { + assert(0); + return 0; +} + +} + +#endif /* #ifndef _handler_field_h_ */ diff --git a/storage/sparrow/handler/hasparrow.cc b/storage/sparrow/handler/hasparrow.cc new file mode 100644 index 000000000000..9766b43f63a4 --- /dev/null +++ b/storage/sparrow/handler/hasparrow.cc @@ -0,0 +1,2156 @@ +/* + Sparrow handler. +*/ + +#include "hasparrow.h" + +#include "../engine/internalapi.h" +#include "../engine/persistent.h" +#include "../engine/scheduler.h" +#include "../engine/listener.h" +#include "../engine/fileutil.h" +#include "../engine/cache.h" +#include "../engine/alter.h" +#include "../engine/purge.h" +#include "../engine/coalescing.h" +#include "../dns/dns.h" + +//#include +#include "sql/current_thd.h" +#include "sql/sql_show.h" +#include "sql/sql_lex.h" +#include "sql/create_field.h" +#include "sql/mysqld.h" + +#include "../engine/log.h" + +// #ifdef __GNUC__ +// #pragma GCC diagnostic ignored "-Wunused-parameter" +// #endif + + +namespace Sparrow { + +SparrowStatus SparrowStatus::status_; +bool SparrowHandler::initialized_ = false; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowHandler +////////////////////////////////////////////////////////////////////////////////////////////////////// + +static const char* HA_SPARROW_EXTS[]= { + "", // for directories + ".SPM", + ".SPI", + ".SPD", + ".SPS", + NullS +}; + + +// STATIC +int SparrowHandler::initialize(void* p) { + SPARROW_ENTER("SparrowHandler::initialize"); + + Lock::initializeStatics(); + RWLock::initializeStatics(); + Cond::initializeStatics(); + + // Initialize handlerton. + // handlerton is a singleton structure - one instance per storage engine - + // to provide access to storage engine functionality that works on the + // "global" level (unlike handler class that works on a per-table basis). + + handlerton* hton = static_cast(p); + hton->state = SHOW_OPTION_YES; + hton->db_type = static_cast(DB_TYPE_SPARROW); + hton->create = &SparrowHandler::create; + hton->close_connection = SparrowHandler::closeConnection; + hton->drop_database = SparrowHandler::dropDatabase; + hton->panic = SparrowHandler::panic; + hton->show_status = SparrowHandler::showStatus; + hton->flags = HTON_TEMPORARY_NOT_SUPPORTED | HTON_NO_PARTITION; + // TODO: Review this declaration when working on the schema alteration. + //hton->alter_table_flags = SparrowHandler::alterTableFlags; + hton->file_extensions = HA_SPARROW_EXTS; + + try { + Master::initialize(); + + // Initialize socket util. + SocketUtil::initialize(); + + // Initialize file util. + FileUtil::initialize(); + + // Initialize caches. + FileCache::initialize(); + BlockCache::initialize(); + + // Initialize worker and writer threads. + Worker::initialize(); + Flush::initialize(); + Writer::initialize(); + DnsWorker::initialize(); + ApiWorker::initialize(); + + // Initialize coalescing threads. + CoalescingWorker::initialize(); + + // Start misc other background threads + start_slave_threads(); + + // Release thread local storage. + IOContext::destroy(); + } catch(const SparrowException& e) { + e.toLog(); + return ER_UNKNOWN_ERROR; + } + return 0; +} + + +int SparrowHandler::start_slave_threads() { + SPARROW_ENTER("SparrowHandler::start_slave_threads"); + try { + // Initialize scheduler. + Scheduler::initialize(); // THD thread + + // Initialize DNS service. + Dns::initialize(); // THD thread + + // Initialize purge thread. + Purge::initialize(); // THD thread + + // Find all master files. + InternalApi::setup(); + + // Initialize listener. + Listener::initialize(); // THD thread + + initialized_ = true; + + } catch(const SparrowException& e) { + e.toLog(); + return ER_UNKNOWN_ERROR; + } + return 0; +} + + +// STATIC +int SparrowHandler::stop_slave_threads() { + if ( !initialized_ ) + return 0; + + SPARROW_ENTER("SparrowHandler::stop_slave_threads"); + spw_print_information("Shutting down Sparrow, phase 1..."); + + uint64_t t = my_micro_time(); + + InternalApi::StopCoalescingTasks(); + + // Shutdown listener. + Listener::shutdown(); + uint64_t now = my_micro_time(); + Str duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down listener thread in %s", duration.c_str()); + t = now; + + // Force flush of transient partitions. + if (!sparrow_quick_shutdown) { + InternalApi::flushAll(true); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Flushed partitions in %s", duration.c_str()); + t = now; + } + Flush::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down flush threads in %s", duration.c_str()); + t = now; + + // Shutdown scheduler. + Scheduler::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down scheduler thread in %s", duration.c_str()); + t = now; + + // Shutdown DNS service. + Dns::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down DNS threads in %s", duration.c_str()); + t = now; + + DnsWorker::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down DNS worker threads in %s", duration.c_str()); + t = now; + + AlterWorker::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down alter worker threads in %s", duration.c_str()); + t = now; + + ApiWorker::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down API worker threads in %s", duration.c_str()); + t = now; + + // Shutdown coalescing threads. + CoalescingWorker::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down coalescing threads in %s", duration.c_str()); + t = now; + + // Shutdown purge thread. + Purge::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down purge thread in %s", duration.c_str()); + t = now; + + // Shutdown worker and writer threads. + Worker::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down worker threads in %s", duration.c_str()); + t = now; + + Writer::shutdown(); + now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + spw_print_information("Shut down writer threads in %s", duration.c_str()); + + return 0; +} + + +// STATIC +int SparrowHandler::deinitialize(void* p) { + SPARROW_ENTER("SparrowHandler::deinitialize"); + spw_print_information("Shutting down Sparrow, phase 2..."); + +#ifndef NDEBUG + uint64_t t = my_micro_time(); +#endif + + Str duration; + + // Close opened files. + FileCache::get().clear(); +#ifndef NDEBUG + uint64_t now = my_micro_time(); + duration = Str::fromDuration((now - t) / 1000); + DBUG_PRINT("sparrow_handler", ("Clear file cache in %s", duration.c_str())); + t = now; +#endif + + Lock::deinitializeStatics(); + RWLock::deinitializeStatics(); + Cond::deinitializeStatics(); + + spw_print_information("Sparrow shutdown complete."); + return 0; +} + +// STATIC +// Creates handler object for the table in the storage engine. +handler* SparrowHandler::create(handlerton* hton, TABLE_SHARE* table, bool partitioned, MEM_ROOT* mem_root) { + SPARROW_ENTER("SparrowHandler::create"); + return new (mem_root) SparrowHandler(hton, table); +} + +// STATIC +int SparrowHandler::closeConnection(handlerton* hton, THD* thd) { + SPARROW_ENTER("SparrowHandler::closeConnection"); + IOContext::destroy(); + return 0; +} + +// STATIC +int SparrowHandler::panic(handlerton* hton, enum ha_panic_function flag) { + SPARROW_ENTER("SparrowHandler::panic"); + // Nothing to do. + return 0; +} + +// STATIC +bool SparrowHandler::showStatus(handlerton* hton, THD* thd, stat_print_fn* stat_print, enum ha_stat_type stat_type) { + SPARROW_ENTER("SparrowHandler::showStatus"); + PrintBuffer buffer; + buffer << Str::fromTimestamp(Scheduler::now()) << " - Sparrow Engine Status\n\n"; + buffer << "Uptime: " << Str::fromDuration(Scheduler::uptime()) << "\n"; + FileUtil::report(buffer); + InternalApi::report(buffer); + return stat_print(thd, SPARROW_ENGINE_NAME, + static_cast(strlen(SPARROW_ENGINE_NAME)), "", 0, + reinterpret_cast(buffer.getData()), static_cast(buffer.position())); +} + +// STATIC +void SparrowHandler::dropDatabase(handlerton* hton, char* path) { + SPARROW_ENTER("SparrowHandler::dropDatabase"); +} + +SparrowHandler::SparrowHandler(handlerton* hton, TABLE_SHARE* table) + : handler(hton, table) { + SPARROW_ENTER("SparrowHandler::SparrowHandler"); + share_ = 0; +} + +SparrowHandler::~SparrowHandler() { + close(); +} + +const char* SparrowHandler::table_type() const { + SPARROW_ENTER("SparrowHandler::table_type"); + return SPARROW_ENGINE_NAME; +} + +//const char** SparrowHandler::bas_ext() const { +// SPARROW_ENTER("SparrowHandler::bas_ext"); +// return HA_SPARROW_EXTS; +//} + +/*int SparrowHandler::backup(THD* thd, HA_CHECK_OPT* checkOpt) { + SPARROW_ENTER("SparrowHandler::backup"); + return HA_ADMIN_NOT_IMPLEMENTED; +} + +int SparrowHandler::restore(THD* thd, HA_CHECK_OPT* checkOpt) { + SPARROW_ENTER("SparrowHandler::restore"); + return HA_ADMIN_NOT_IMPLEMENTED; +}*/ + +handler::Table_flags SparrowHandler::table_flags() const { + SPARROW_ENTER("SparrowHandler::table_flags"); + return HA_NO_TRANSACTIONS + | HA_PARTIAL_COLUMN_READ + | HA_STATS_RECORDS_IS_EXACT + | HA_COUNT_ROWS_INSTANT + | HA_NO_BLOBS + | HA_FILE_BASED + | HA_NULL_IN_KEY + | HA_AUTO_PART_KEY + | HA_CAN_SQL_HANDLER + //| HA_REC_NOT_IN_SEQ // Seems to have been removed. See sql/handler.h:222 + | HA_ANY_INDEX_MAY_BE_UNIQUE + | HA_BINLOG_FLAGS + | HA_NO_COPY_ON_ALTER + | HA_CAN_REPAIR; +} + +// STATIC +uint SparrowHandler::alterTableFlags(uint flags) { + SPARROW_ENTER("SparrowHandler::alterTableFlags"); + return 0; +} + +/*const char* SparrowHandler::index_type(uint inx) { + SPARROW_ENTER("SparrowHandler::index_type"); + return "Default"; +}*/ + +enum ha_key_alg SparrowHandler::get_default_index_algorithm() const +{ + SPARROW_ENTER("SparrowHandler::get_default_index_algorithm"); + return HA_KEY_ALG_SE_SPECIFIC; +} +bool SparrowHandler::is_index_algorithm_supported(enum ha_key_alg key_alg) const { + return key_alg == HA_KEY_ALG_SE_SPECIFIC; +} + + + + +ulong SparrowHandler::index_flags(uint inx, uint part, bool all_parts) const { + SPARROW_ENTER("SparrowHandler::index_flags"); + return HA_READ_NEXT + | HA_READ_PREV + | HA_KEYREAD_ONLY + | HA_READ_ORDER + | HA_READ_RANGE; +} + +int SparrowHandler::info(uint flag) { + SPARROW_ENTER("SparrowHandler::info"); + context_.getStats(getStats(), flag); + return 0; +} + +int SparrowHandler::analyze(THD* thd, HA_CHECK_OPT* checkOpt) { + SPARROW_ENTER("SparrowHandler::analyze"); + return HA_ADMIN_NOT_IMPLEMENTED; +} + +int SparrowHandler::check(THD* thd, HA_CHECK_OPT* checkOpt) { + SPARROW_ENTER("SparrowHandler::check"); + return HA_ADMIN_NOT_IMPLEMENTED; +} + +int SparrowHandler::optimize(THD* thd, HA_CHECK_OPT* checkOpt) { + SPARROW_ENTER("SparrowHandler::optimize"); + return HA_ADMIN_NOT_IMPLEMENTED; +} + +int SparrowHandler::repair(THD* thd, HA_CHECK_OPT* checkOpt) { + SPARROW_ENTER("SparrowHandler::repair"); + Master& master = share_->getMaster(); + master.repair(); + return 0; +} + + +// Analyze the modifications to the columns. Try to create columns with the changes. +// If there are incoherences or any issue during the creation, an exception will be thrown and +// caught by mysql alteration engine. +int SparrowHandler::check_alterations(Alter_info* info) { + SPARROW_ENTER("SparrowHandler::check_alterations"); + const uint alterFlags = info->flags; + ReadGuard guard(share_->getMaster().getLock()); + try { + if (alterFlags & Alter_info::ALTER_ADD_COLUMN) { + List_iterator iterator(info->create_list); + Create_field* field; + while ((field = iterator++) != 0) { + if (field->after == first_keyword) { + throw SparrowException::create(false, "cannot add column `%s` at first position because table timestamp must remain the first column", + field->field_name); + } + createColumn(*field); // Check column creation. + } + } + if (alterFlags & Alter_info::ALTER_DROP_COLUMN) { + for (const auto drop : info->drop_list) { + const Str name(drop->name, false); + const uint32_t pos = share_->getMaster().getColumn(name); + if (pos == 0) { + throw SparrowException::create(false, "cannot drop timestamp column `%s`", name.c_str()); + } + const Indexes& indexes = share_->getMaster().getIndexes(); + for (uint32_t i = 0; i < indexes.length(); ++i) { + const Index& index = indexes[i]; + if (!index.isDropped() && index.getColumnIds().contains(pos)) { + throw SparrowException::create(false, "cannot drop column `%s` because it is used by index `%s`", name.c_str(), index.getName().c_str()); + } + } + } + } + if (alterFlags & Alter_info::ALTER_CHANGE_COLUMN) { + List_iterator iterator(info->create_list); + Create_field* field; + while ((field = iterator++) != 0) { + createColumn(*field); // Check column creation. + } + } + return 0; + } catch(const SparrowException& e) { + Str s("this operation: "); + s += Str(e.getText()); + my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), s.c_str()); + return 1; + } +} + +// test_if_locked is a list of flags; see include/my_base.h:42 +// mode indicates how to open the file. Example: O_RDONLY, O_WRONLY, O_RDWR, O_APPEND, ... See examples in other storage engines. +//int SparrowHandler::open(const char* name, int mode, uint options) { +int SparrowHandler::open(const char *name, int mode, uint test_if_locked, const dd::Table *table_def) { + SPARROW_ENTER("SparrowHandler::open"); + const Str databaseName = FileUtil::getDatabaseName(name); + const Str tableName = FileUtil::getTableName(name); + DBUG_PRINT("sparrow_handler", ("Open table %s.%s with mode %d", databaseName.c_str(), tableName.c_str(), mode)); + try { + share_ = TableShare::acquire(databaseName, tableName, table, &lockData_); + context_.initialize(table, share_); + return 0; + } catch(const SparrowException& e) { + spw_print_error("Sparrow: Cannot open table %s.%s: %s", databaseName.c_str(), tableName.c_str(), e.getText()); + return HA_ERR_INTERNAL_ERROR; + } +} + +handler* SparrowHandler::clone(const char* name, MEM_ROOT* mem_root) { + // Do the same as the default implementation, but clone also the current context. + SparrowHandler* handler = static_cast(handler::clone(name, mem_root)); + handler->context_.clone(context_); + return handler; +} + +int SparrowHandler::close() { + SPARROW_ENTER("SparrowHandler::close"); + if (share_ != 0) { + DBUG_PRINT("sparrow_handler", ("Close table %s.%s", share_->getDatabaseName().c_str(), share_->getTableName().c_str())); + TableShare::release(share_); + share_ = 0; + } + context_.reset(); + return 0; +} + +int SparrowHandler::delete_table(const char *name, const dd::Table *table_def) { + SPARROW_ENTER("SparrowHandler::delete_table"); + DBUG_PRINT("sparrow_handler", ("Delete table %s", name)); + + const Str database = FileUtil::getDatabaseName(name); + const Str table = FileUtil::getTableName(name); + try { + // Get the master file and remove it from the hash so it is no longer accessible from the API. + MasterGuard master = InternalApi::get(database.c_str(), table.c_str(), false, true, 0); + master->prepareForDeletion(); + Atomic::inc32(&SparrowStatus::get().ddlSerial_); + return 0; + } catch(const SparrowException& e) { + spw_print_error("Sparrow: Cannot delete table %s.%s: %s", database.c_str(), table.c_str(), e.getText()); + return HA_ERR_INTERNAL_ERROR; + } +} + +int SparrowHandler::rename_table(const char *from, const char *to, const dd::Table *from_table_def, dd::Table *to_table_def) { + SPARROW_ENTER("SparrowHandler::rename_table "); + DBUG_PRINT("sparrow_handler", ("Rename table %s to %s", from, to)); + const Str fromDatabaseName = FileUtil::getDatabaseName(from); + const Str fromTableName = FileUtil::getTableName(from); + const Str toDatabaseName = FileUtil::getDatabaseName(to); + const Str toTableName = FileUtil::getTableName(to); + try { + InternalApi::rename(fromDatabaseName.c_str(), fromTableName.c_str(), + toDatabaseName.c_str(), toTableName.c_str()); + Atomic::inc32(&SparrowStatus::get().ddlSerial_); + return 0; + } catch(const SparrowException& e) { + spw_print_error("Sparrow: Cannot rename table %s.%s to %s.%s: %s", fromDatabaseName.c_str(), fromTableName.c_str(), + toDatabaseName.c_str(), toTableName.c_str(), e.getText()); + return HA_ERR_INTERNAL_ERROR; + } +} + +// Create new table. +int SparrowHandler::create(const char *name, TABLE *table, HA_CREATE_INFO *create_info, dd::Table *table_def) { + SPARROW_ENTER("SparrowHandler::create"); + const Str databaseName = FileUtil::getDatabaseName(name); + const Str tableName = FileUtil::getTableName(name); + DBUG_PRINT("sparrow_handler", ("Create table %s.%s", databaseName.c_str(), tableName.c_str())); + try { + MasterGuard master = InternalApi::get(databaseName.c_str(), tableName.c_str(), true, false, table->s); + WriteGuard guard(master->getLock()); + + // Get columns. + restore_record(table, s->default_values); + BitmapGuard bitmapGuard(table); + const uint nbColumns = table->s->fields; + Columns columns(nbColumns); + for (uint i = 0; i < nbColumns; ++i) { + const Column column = createColumn(*table->field[i]); + + // First column must be a non-null timestamp. + if (i == 0 && (column.getType() != COL_TIMESTAMP || column.isFlagSet(COL_NULLABLE))) { + throw SparrowException::create(false, "first column `%s` must be a non-null timestamp", column.getName().c_str()); + } + columns.append(column); + } + + // Get indexes. + const uint nbIndexes = table->s->keys; + Indexes indexes(nbIndexes); + indexes.forceLength(nbIndexes); + IndexMappings indexMappings(nbIndexes); + indexMappings.forceLength(nbIndexes); + for (uint i = 0; i < nbIndexes; ++i) { + const KEY& index = table->key_info[i]; + indexMappings[i] = i; + const uint indexColumns = index.user_defined_key_parts; + ColumnIds columnIds(indexColumns); + for (uint j = 0; j < indexColumns; ++j) { + const Field& field = *index.key_part[j].field; + columnIds.append(static_cast(field.field_index())); + } + const bool unique = (index.flags & HA_NOSAME) != 0; + const Index newIndex(index.name, columnIds, unique); + const uint32_t check = indexes.index(newIndex); + if (check != SYS_NPOS) { + throw SparrowException::create(false, "index %s is a duplicate of index %s", index.name, indexes[check].getName().c_str()); + } + indexes[i] = newIndex; + } + + // Get foreign keys. + THD* thd = ha_thd(); + const String query(thd->normalized_query()); + const Str sql(query.ptr(), static_cast(query.length())); + const ForeignKeys foreignKeys = getForeignKeys(sql.c_str(), master->getDatabase(), master->getTable(), table); + + // Setup table definition. + master->setColumns(columns); + master->setIndexes(indexes); + master->setIndexMappings(indexMappings); + master->setForeignKeys(foreignKeys); + master->toDisk(); + Atomic::inc32(&SparrowStatus::get().ddlSerial_); + return 0; + } catch(const SparrowException& e) { + spw_print_error("Sparrow: Cannot create table %s.%s: %s", databaseName.c_str(), tableName.c_str(), e.getText()); + return -1; + } + return 0; +} + +// STATIC +Column SparrowHandler::createColumn(Field& field) _THROW_(SparrowException) { + // Read default value. + const enum_field_types fieldType = field.type(); + const uint32_t fieldFlags = field.all_flags(); + CHARSET_INFO* charset = const_cast(field.charset()); + const bool hasDefaultValue = !field.is_null() + && fieldType != FIELD_TYPE_BLOB + && !(fieldFlags & NO_DEFAULT_VALUE_FLAG) + && ((field.auto_flags & Field::NEXT_NUMBER) == 0); + char tmp[MAX_FIELD_WIDTH]; + Str defaultValue; + if (hasDefaultValue) { + String s(tmp, sizeof(tmp), charset); + field.val_str(&s); + defaultValue = Str(s.c_ptr(), false); + } + uint decimals = field.decimals(); + return createColumn(field.field_name, fieldType, decimals, fieldFlags, charset, defaultValue); +} + +// STATIC +Column SparrowHandler::createColumn(Create_field& field) _THROW_(SparrowException) { + // Read default value. + Str defaultValue; + String tmp; + String* s = field.constant_default == nullptr ? nullptr : field.constant_default->val_str(&tmp); + if (s != nullptr) { + defaultValue = Str(s->c_ptr(), static_cast(s->length())); + } + uint decimals = field.decimals; + return createColumn(field.field_name, field.sql_type, decimals, field.flags, field.charset == 0 ? &my_charset_utf8mb4_bin : field.charset, defaultValue); +} + +// STATIC +Column SparrowHandler::createColumn(const char* name, const enum_field_types fieldType, const uint decimals, const uint32_t fieldFlags, + const CHARSET_INFO* charset, const Str& defaultValue) _THROW_(SparrowException) { + assert(charset != 0); + ColumnType type = COL_UNKNOWN; + switch (fieldType) { + case MYSQL_TYPE_TINY: type = COL_BYTE; break; + case MYSQL_TYPE_SHORT: type = COL_SHORT; break; + case MYSQL_TYPE_LONG: type = COL_INT; break; + case MYSQL_TYPE_DOUBLE: type = COL_DOUBLE; break; + case MYSQL_TYPE_TIMESTAMP2: + case MYSQL_TYPE_TIMESTAMP: type = COL_TIMESTAMP; break; + case MYSQL_TYPE_LONGLONG: type = COL_LONG; break; + case MYSQL_TYPE_VARCHAR: type = charset == &my_charset_bin ? COL_BLOB : COL_STRING; break; + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_DECIMAL: + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_NULL: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_YEAR: + case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_NEWDECIMAL: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_SET: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_GEOMETRY: + default : break; + } + if (type == COL_UNKNOWN) { + throw SparrowException::create(false, "unknown type %d for column `%s`", static_cast(fieldType), name); + } + + // Strings must be UTF-8. All UTF-8 collations are allowed, so we check + // only the character set. + if (type == COL_STRING && strcmp(charset->csname, "utf8mb4") != 0) { + throw SparrowException::create(false, "string column `%s` must be UTF-8", name); + } + + // Blobs must be binary. + if (type == COL_BLOB && charset != &my_charset_bin) { + throw SparrowException::create(false, "blob column `%s` must have binary charset", name); + } + + // Ignore unsigned flag on timestamps (set by MySQL). + const uint32_t flags = (((fieldFlags & NOT_NULL_FLAG) == 0) ? COL_NULLABLE : 0) + | (((fieldFlags & AUTO_INCREMENT_FLAG) != 0) ? COL_AUTO_INC : 0) + | ((type != COL_TIMESTAMP && (fieldFlags & UNSIGNED_FLAG) != 0) ? COL_UNSIGNED : 0); + + if ((flags & COL_AUTO_INC) != 0) { + if (type != COL_LONG) { + throw SparrowException::create(false, "column `%s` cannot be auto incremental; only longs can be", name); + } + if ((flags & COL_NULLABLE) != 0) { + throw SparrowException::create(false, "column `%s` is auto incremental and cannot be nullable", name); + } + } + const uint32_t info = (type == COL_TIMESTAMP ? decimals : 0); + return Column(name, type, flags, info, charset->csname, defaultValue); +} + +// Utility method to remove comments from the given SQL statement. +// STATIC +Str SparrowHandler::stripComments(const char* sql) +{ + const char* sptr; + char* ptr; + // Unclosed quote character (0 if none). + char quote = 0; + char* str = new char[strlen(sql)+1]; + strcpy(str, sql); + sptr = sql; + ptr = str; + for ( ; ; ) { + if (*sptr == '\0') { + assert(ptr <= str + strlen(sql)); + *ptr = '\0'; + break; + } + bool moveNext = true; + if (*sptr == quote) { + // Closing quote character: do not look for starting quote or comments. + quote = 0; + } else if (quote) { + // Within quotes: do not look for starting quotes or comments. + } else if (*sptr == '"' || *sptr == '`') { + // Starting quote: remember the quote character. + quote = *sptr; + } else if (*sptr == '#' || (sptr[0] == '-' && sptr[1] == '-' && sptr[2] == ' ')) { + for ( ; ; ) { + // In Unix a newline is 0x0A while in Windows it is 0x0D followed by 0x0A. + if (*sptr == (char)0x0A || *sptr == (char)0x0D || *sptr == '\0') { + moveNext = false; + } else { + sptr++; + } + } + } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { + for ( ; ; ) { + if (*sptr == '*' && *(sptr + 1) == '/') { + sptr += 2; + moveNext = false; + } else if (*sptr == '\0') { + moveNext = false; + } else { + sptr++; + } + } + } + if (moveNext) { + *ptr = *sptr; + ptr++; + sptr++; + } + } + return Str(str); +} + +// Gets the foreign keys of a table, given its create SQL statement and its column definitions. +// STATIC +ForeignKeys SparrowHandler::getForeignKeys(const char* sql, const Str& databaseName, + const Str& tableName, TABLE* table) { + // Find strings like "`fk col` REFERENCES `db`.`tbl` (`col`)". + ForeignKeys foreignKeys; + const char* tokens[] = { "REFERENCES", 0, "(", 0, ")" }; + Str identifiers[4]; + const char* s = moveTo(sql, "("); + if (s == 0) { + return foreignKeys; + } + const char* start = s; + int n = 0; + for ( ; ; ) { + int id = 0; + for (int i = 0; i < static_cast(sizeof(tokens) / sizeof(tokens[0])); ++i) { + const char* token = tokens[i]; + bool hasDot = false; + if (token == 0) { + s = getIdentifier(s, identifiers[id++], hasDot); + if (hasDot) { + s = getIdentifier(s, identifiers[id++], hasDot); + } + } else { + s = moveTo(s, token); + } + if (s == 0) { + break; + } + if (i == 0) { + // Move back to get column identifier. + const char* save = s; + while (s >= start && *s != ',') { + s--; + } + s = moveTo(s, ","); + if (s == 0) { + break; + } + s = getIdentifier(s, identifiers[id++], hasDot); + if (s == 0) { + break; + } + s = save; + } + } + if (id != 3 && id != 4) { + break; + } + for (uint32_t columnId = 0; columnId < table->s->fields; ++columnId) { + if (my_strcasecmp(&my_charset_latin1, table->field[columnId]->field_name, identifiers[0].c_str()) == 0) { + const Str& dbName = (id == 3 ? databaseName : identifiers[1]); + const Str& tblName = (id == 3 ? identifiers[1] : identifiers[2]); + const Str& colName = (id == 3 ? identifiers[2] : identifiers[3]); + char buffer[2048]; + snprintf(buffer, sizeof(buffer), "FK_%s_%s_%d", databaseName.c_str(), + tableName.c_str(), n++); + foreignKeys.append(ForeignKey(buffer, static_cast(columnId), dbName, tblName, colName)); + } + } + } + return foreignKeys; +} + +// Utility method to parse strings: move s to the next occurence of keyword (case-insensitive), +// and skip whitespaces and newlines after. Returns 0 if not found. +// STATIC +const char* SparrowHandler::moveTo(const char* s, const char* keyword) { + size_t l = strlen(keyword); + while (*s != 0) { + if (native_strncasecmp(s, keyword, l) == 0) { + s += strlen(keyword); + while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') { + s++; + } + if (*s == 0) { + return 0; + } + return s; + } + s++; + } + return 0; +} + +// Utility method to parse an identifier: id, `id`, x.y or `x`.`y`. +// Returns 0 if we reach the end of string. +// STATIC +const char* SparrowHandler::getIdentifier(const char* s, Str& identifier, bool& hasDot) { + bool hasQuote = false; + if (*s == '`' || *s == '"') { + hasQuote = true; + s++; + } + const char* start = s; + while (*s != 0 && (isalnum(*s) ||*s == '_')) { + s++; + } + if (*s == 0) { + return 0; + } + identifier = Str(start, static_cast(s - start)); + if (hasQuote) { + if (*s != '`' && *s != '"') { + return 0; + } + s++; + } + if (*s == '.') { + hasDot = true; + s++; + if (*s == 0) { + return 0; + } + } else { + hasDot = false; + } + return s; +} + +// Gets create info for foreign keys. This method is called when e.g. calling +// DatabaseMetaData.getImportedKeys() on the JDBC side. +// The resulting string looks like this: +// "CONSTRAINT `` FOREIGN KEY (``) REFERENCES `` (``) ON DELETE CASCADE". +// Multiple constraints are separated by commas. +/*char* SparrowHandler::get_foreign_key_create_info() { + const Master& master = share_->getMaster(); + const ForeignKeys& foreignKeys = master.getForeignKeys(); + size_t length = 65536; + char* result = 0; + for ( ; ; ) { + if (result != 0) { + my_free(result); + length *= 2; + } + result = static_cast(my_malloc(length, MYF(0))); + result[0] = '\0'; + char* s = result; + bool ok = true; + for (uint32_t i = 0; i < foreignKeys.length(); ++i) { + const ForeignKey& foreignKey = foreignKeys[i]; + const char* databaseName = foreignKey.getDatabaseName().length() == 0 + ? master.getDatabase().c_str() : foreignKey.getDatabaseName().c_str(); + const char* sparrowColumnName = table->field[foreignKey.getColumnId()]->field_name; + int l = snprintf(s, length, ",\n CONSTRAINT `%s` FOREIGN KEY (`%s`) REFERENCES `%s`.`%s` (`%s`) ON DELETE RESTRICT ON UPDATE RESTRICT", + foreignKey.getName().c_str(), sparrowColumnName, databaseName, + foreignKey.getTableName().c_str(), foreignKey.getColumnName().c_str()); + if (l < 0) { + ok = false; + break; + } + s += l; + length -= l; + } + if (ok) { + break; + } + } + return result; +} + +void SparrowHandler::free_foreign_key_create_info(char* str) { + my_free(str); +} + +// Gets the list of foreign keys. This method is called when e.g. issuing a SQL statement +// like "SELECT * FROM information_schema.KEY_COLUMN_USAGE". +int SparrowHandler::get_foreign_key_list(THD* thd, List* f_key_list) { + const Master& master = share_->getMaster(); + const ForeignKeys& foreignKeys = master.getForeignKeys(); + for (uint32_t i = 0; i < foreignKeys.length(); ++i) { + const ForeignKey& foreignKey = foreignKeys[i]; + FOREIGN_KEY_INFO info; + info.foreign_id = thd_make_lex_string(thd, 0, foreignKey.getName().c_str(), + static_cast(foreignKey.getName().length()), 1); + info.referenced_db = thd_make_lex_string(thd, 0, foreignKey.getDatabaseName().c_str(), + static_cast(foreignKey.getDatabaseName().length()), 1); + info.referenced_table = thd_make_lex_string(thd, 0, foreignKey.getTableName().c_str(), + static_cast(foreignKey.getTableName().length()), 1); + info.referenced_key_name = 0; + info.referenced_fields.push_back(thd_make_lex_string(thd, 0, foreignKey.getColumnName().c_str(), + static_cast(foreignKey.getColumnName().length()), 1)); + const char* columnName = table->field[foreignKey.getColumnId()]->field_name; + info.foreign_fields.push_back(thd_make_lex_string(thd, 0, columnName, + static_cast(strlen(columnName)), 1)); + info.delete_method = thd_make_lex_string(thd, 0, "RESTRICT", (uint)8, 1); + info.update_method = thd_make_lex_string(thd, 0, "RESTRICT", (uint)8, 1); + f_key_list->push_back(static_cast(thd_memdup(thd, &info, sizeof(info)))); + } + return 0; +}*/ + +THR_LOCK_DATA** SparrowHandler::store_lock(THD* thd, THR_LOCK_DATA** to, enum thr_lock_type lockType) { + SPARROW_ENTER("SparrowHandler::store_lock"); +#ifndef NDEBUG + const char* slock = "unknown"; + switch (lockType) { + case TL_IGNORE: slock = "TL_IGNORE"; break; + case TL_UNLOCK: slock = "TL_UNLOCK"; break; + case TL_READ_DEFAULT: slock = "TL_READ_DEFAULT"; break; + case TL_READ: slock = "TL_READ"; break; + case TL_READ_WITH_SHARED_LOCKS: slock = "TL_READ_WITH_SHARED_LOCKS"; break; + case TL_READ_HIGH_PRIORITY: slock = "TL_READ_HIGH_PRIORITY"; break; + case TL_READ_NO_INSERT: slock = "TL_READ_NO_INSERT"; break; + case TL_WRITE_CONCURRENT_DEFAULT: slock = "TL_WRITE_CONCURRENT_DEFAULT"; break; + case TL_WRITE_ALLOW_WRITE: slock = "TL_WRITE_ALLOW_WRITE"; break; + case TL_WRITE_CONCURRENT_INSERT: slock = "TL_WRITE_CONCURRENT_INSERT"; break; + //case TL_WRITE_DELAYED: slock = "TL_WRITE_DELAYED"; break; + case TL_WRITE_DEFAULT: slock = "TL_WRITE_DEFAULT"; break; + case TL_WRITE_LOW_PRIORITY: slock = "TL_WRITE_LOW_PRIORITY"; break; + case TL_WRITE: slock = "TL_WRITE"; break; + case TL_WRITE_ONLY: slock = "TL_WRITE_ONLY"; break; + } + DBUG_PRINT("sparrow_handler", ("Table %s.%s, lock type %s", share_->getDatabaseName().c_str(), share_->getTableName().c_str(), slock)); +#endif + if (lockType != TL_IGNORE && lockData_.type == TL_UNLOCK) { + lockData_.type= lockType; + } + *to++ = &lockData_; + return to; +} + +int SparrowHandler::external_lock(THD* thd, int lockType) { + SPARROW_ENTER("SparrowHandler::external_lock"); +#ifndef NDEBUG + if (share_ != 0) { + const char* slock = "unknown"; + switch (lockType) { + case F_UNLCK: slock = "F_UNLCK"; break; + case F_RDLCK: slock = "F_RDLCK"; break; + case F_WRLCK: slock = "F_WRLCK"; break; + } + DBUG_PRINT("sparrow_handler", ("Table %s.%s, lock type %s", share_->getDatabaseName().c_str(), share_->getTableName().c_str(), slock)); + } +#endif + if (lockType == F_WRLCK) { + context_.writeLock(); + } else if (lockType == F_UNLCK) { + context_.unlock(); + } + return 0; +} + +void SparrowHandler::column_bitmaps_signal() { + SPARROW_ENTER("SparrowHandler::column_bitmaps_signal"); + context_.setActiveIndex(getActiveIndex()); +} + +int SparrowHandler::rnd_init(bool scan) { + SPARROW_ENTER("SparrowHandler::rnd_init"); + DBUG_PRINT("sparrow_handler", ("Table %s.%s", share_->getDatabaseName().c_str(), share_->getTableName().c_str())); + context_.resetPosition(); + return 0; +} + +int SparrowHandler::rnd_next(uchar* buf) { + SPARROW_ENTER("SparrowHandler::rnd_next"); + ha_statistic_increment(&System_status_var::ha_read_rnd_next_count); + const bool result = context_.moveNext(buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + return result ? 0 : HA_ERR_END_OF_FILE; +} + +int SparrowHandler::rnd_pos(uchar* buf, uchar* pos) { + SPARROW_ENTER("SparrowHandler::rnd_pos"); + ha_statistic_increment(&System_status_var::ha_read_rnd_count); + const bool result = context_.moveAbsolute(my_get_ptr(pos, sizeof(uint64_t)), buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + return result ? 0 : HA_ERR_END_OF_FILE; +} + +int SparrowHandler::rnd_end() { + SPARROW_ENTER("SparrowHandler::rnd_end"); + DBUG_PRINT("sparrow_handler", ("Table %s.%s", share_->getDatabaseName().c_str(), share_->getTableName().c_str())); + context_.resetPosition(); + return 0; +} + +void SparrowHandler::position(const uchar* record) { + SPARROW_ENTER("SparrowHandler::position"); + const uint64_t recordPosition = context_.savePosition(); + my_store_ptr(ref, sizeof(uint64_t), recordPosition); +} + +int SparrowHandler::index_init(uint idx, bool sorted) { + SPARROW_ENTER("SparrowHandler::index_init"); + setActiveIndex(idx); + DBUG_PRINT("sparrow_handler", ("Table %s.%s, index %u", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), getActiveIndex())); + context_.setActiveIndex(idx); + return 0; +} + +int SparrowHandler::index_next(uchar* buf) { + SPARROW_ENTER("SparrowHandler::index_next"); + ha_statistic_increment(&System_status_var::ha_read_next_count); + bool result = context_.findNextRecord(buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + return result ? 0 : HA_ERR_END_OF_FILE; +} + +int SparrowHandler::index_prev(uchar* buf) { + SPARROW_ENTER("SparrowHandler::index_prev"); + ha_statistic_increment(&System_status_var::ha_read_prev_count); + bool result = context_.findPreviousRecord(buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + return result ? 0 : HA_ERR_END_OF_FILE; +} + +int SparrowHandler::index_first(uchar* buf) { + SPARROW_ENTER("SparrowHandler::index_first"); +#ifndef NDEBUG + const uint64_t start = my_micro_time(); +#endif + ha_statistic_increment(&System_status_var::ha_read_first_count); + bool result = context_.findFirstRecord(buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + DBUG_PRINT("sparrow_handler", ("Table %s.%s, index %u, duration %llums", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), getActiveIndex(), static_cast((my_micro_time() - start) / 1000))); + return result ? 0 : HA_ERR_END_OF_FILE; +} + +int SparrowHandler::index_last(uchar* buf) { + SPARROW_ENTER("SparrowHandler::index_last"); +#ifndef NDEBUG + const uint64_t start = my_micro_time(); +#endif + ha_statistic_increment(&System_status_var::ha_read_last_count); + bool result = context_.findLastRecord(buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + DBUG_PRINT("sparrow_handler", ("Table %s.%s, index %u, duration %llums", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), getActiveIndex(), static_cast((my_micro_time() - start) / 1000))); + return result ? 0 : HA_ERR_END_OF_FILE; +} + +int SparrowHandler::index_read_map(uchar* buf, const uchar* key, + key_part_map keyPartMap, enum ha_rkey_function findFlag) { + SPARROW_ENTER("SparrowHandler::index_read_map"); +#ifndef NDEBUG + const uint64_t start = my_micro_time(); +#endif + ha_statistic_increment(&System_status_var::ha_read_key_count); + if (key == 0) { + const bool result = context_.findFirstRecord(buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + DBUG_PRINT("sparrow_handler", ("Table %s.%s, index %u, duration %llums", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), getActiveIndex(), static_cast((my_micro_time() - start) / 1000))); + return result ? 0 : HA_ERR_END_OF_FILE; + } else { + const bool result = context_.findRecord(KeyValue(const_cast(key), keyPartMap), findFlag, buf); + table->set_row_status_from_handler(!result); + //table->status = result ? 0 : STATUS_NOT_FOUND; + DBUG_PRINT("sparrow_handler", ("Table %s.%s, index %u, duration %llums", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), getActiveIndex(), static_cast((my_micro_time() - start) / 1000))); + return result ? 0 : HA_ERR_END_OF_FILE; + } +} + +int SparrowHandler::index_read_idx_map(uchar* buf, uint index, const uchar* key, + key_part_map keyPartMap, enum ha_rkey_function findFlag) { + SPARROW_ENTER("SparrowHandler::index_read_idx_map"); + index_init(index, true); + return index_read_map(buf, key, keyPartMap, findFlag); +} + +int SparrowHandler::index_read_last_map(uchar* buf, const uchar* key, key_part_map keyPartMap) { + SPARROW_ENTER("SparrowHandler::index_read_last_map"); + return index_read_map(buf, key, keyPartMap, HA_READ_PREFIX_LAST); +} + +int SparrowHandler::index_end() { + SPARROW_ENTER("SparrowHandler::index_end"); + DBUG_PRINT("sparrow_handler", ("Table %s.%s, index %u", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), getActiveIndex())); + setActiveIndex(MAX_KEY); + context_.resetPosition(); + return 0; +} + +double SparrowHandler::scan_time() { + SPARROW_ENTER("SparrowHandler::scan_time"); + return ulonglong2double(getStats().data_file_length) / IO_SIZE + 2; +} + +double SparrowHandler::read_time(uint index, uint ranges, ha_rows rows) { + SPARROW_ENTER("SparrowHandler::read_time"); + return rows2double(ranges + rows); +} + +ha_rows SparrowHandler::records_in_range(uint inx, key_range* minKey, key_range* maxKey) { + SPARROW_ENTER("SparrowHandler::records_in_range"); +#ifndef NDEBUG + const uint64_t start = my_micro_time(); +#endif + const uint64_t records = context_.recordsInRange(inx, minKey, maxKey); +#ifndef NDEBUG + if (records == HA_POS_ERROR) { + DBUG_PRINT("sparrow_handler", ("Table %s.%s: cannot use index %u for now", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), inx)); + } else { + DBUG_PRINT("sparrow_handler", ("Table %s.%s, index %u; found %llu records in range in %ums", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), inx, static_cast(records), static_cast((my_micro_time() - start) / 1000))); + } +#endif + return static_cast(records); +} + +ha_rows SparrowHandler::estimate_rows_upper_bound() { + SPARROW_ENTER("SparrowHandler::estimate_rows_upper_bound"); + const uint64_t records = context_.recordsTotal(); +#ifndef NDEBUG + DBUG_PRINT("sparrow_handler", ("Table %s.%s, total recordss %llu", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), static_cast(records))); +#endif + return static_cast(records); +} + +void SparrowHandler::start_bulk_insert(ha_rows rows) { + SPARROW_ENTER("SparrowHandler::start_bulk_insert"); + context_.startInsert(static_cast(rows)); +} + +int SparrowHandler::write_row(uchar* buf) { + SPARROW_ENTER("SparrowHandler::write_row"); + ha_statistic_increment(&System_status_var::ha_write_count); + // Handled by MySQL core: http://dev.mysql.com/doc/refman/5.6/en/timestamp-initialization.html + //if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) { + // table->timestamp_field->set_time(); + //} + if (table->next_number_field && buf == table->record[0]) { + const int error = update_auto_increment(); + if (error) { + return error; + } + } + const bool result = context_.insertRecord(buf); + return result ? 0 : HA_ERR_INTERNAL_ERROR; +} + +int SparrowHandler::end_bulk_insert() { + SPARROW_ENTER("SparrowHandler::end_bulk_insert"); + const bool result = context_.endInsert(); + return result ? 0 : HA_ERR_INTERNAL_ERROR; +} + +int SparrowHandler::update_row(const uchar* old_data, uchar* new_data) { + SPARROW_ENTER("SparrowHandler::update_row"); + ha_statistic_increment(&System_status_var::ha_update_count); + //if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { + // table->timestamp_field->set_time(); + //} + const bool result = context_.updateRecord(new_data); + return result ? 0 : HA_ERR_INTERNAL_ERROR; +} + +int SparrowHandler::delete_row(const uchar* buf) { + SPARROW_ENTER("SparrowHandler::delete_row"); + ha_statistic_increment(&System_status_var::ha_delete_count); + return HA_ERR_WRONG_COMMAND; +} + +int SparrowHandler::delete_all_rows() { + SPARROW_ENTER("SparrowHandler::delete_all_rows"); + return HA_ERR_WRONG_COMMAND; +} + +void SparrowHandler::unlock_row() { + SPARROW_ENTER("SparrowHandler::unlock_row"); +} + +bool SparrowHandler::get_error_message(int error, String* buf) { + SPARROW_ENTER("SparrowHandler::get_error_message"); + // TODO retrieve last exception message + return false; +} + +/* Request storage engine to do an extra operation: enable,disable or run some functionality. + See mysql\include\my_base.h:184 for details. + */ +int SparrowHandler::extra(enum ha_extra_function operation) { + SPARROW_ENTER("SparrowHandler::extra"); + return 0; +} + +int SparrowHandler::reset() { + SPARROW_ENTER("SparrowHandler::reset"); + DBUG_PRINT("sparrow_handler", ("Reset context for table %s.%s", + share_->getDatabaseName().c_str(), share_->getTableName().c_str())); + context_.reset(); + return 0; +} + +int SparrowHandler::records(ha_rows *num_rows) { + SPARROW_ENTER("SparrowHandler::records"); + *num_rows = getStats().records; +#ifndef NDEBUG + DBUG_PRINT("sparrow_handler", ("Number of records in table %s.%s: %llu", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), static_cast(*num_rows))); +#endif + return 0; +} + +uint SparrowHandler::max_supported_record_length() const { + SPARROW_ENTER("SparrowHandler::max_supported_record_length"); + return HA_MAX_REC_LENGTH; // Actually, that's the deafult value. +} + +uint SparrowHandler::max_supported_keys() const { + SPARROW_ENTER("SparrowHandler::max_supported_keys"); + return MAX_KEY; // Actually, that's the deafult value. +} + +uint SparrowHandler::max_supported_key_parts() const { + SPARROW_ENTER("SparrowHandler::max_supported_key_parts"); + return MAX_REF_PARTS; // Actually, that's the deafult value. +} + +uint SparrowHandler::max_supported_key_length() const { + SPARROW_ENTER("SparrowHandler::max_supported_key_length"); + return MAX_KEY_LENGTH; // Actually, that's the deafult value. +} + +uint SparrowHandler::max_supported_key_part_length(HA_CREATE_INFO *create_info) const { + SPARROW_ENTER("SparrowHandler::max_supported_key_part_length"); + return MAX_KEY_LENGTH; // Actually, that's the deafult value. +} + +bool SparrowHandler::is_crashed() const { + SPARROW_ENTER("SparrowHandler::is_crashed"); + return false; +} + +bool SparrowHandler::auto_repair() const { + SPARROW_ENTER("SparrowHandler::auto_repair"); + return false; +} + +void SparrowHandler::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, ulonglong* first_value, + ulonglong* nb_reserved_values) { + SPARROW_ENTER("SparrowHandler::get_auto_increment"); + SparrowHandler::info(HA_STATUS_AUTO); + *first_value = stats.auto_increment_value; + *nb_reserved_values = ULLONG_MAX; +} + +/* +int SparrowHandler::reset_auto_increment(ulonglong value) { + SPARROW_ENTER("SparrowHandler::reset_auto_increment"); + DBUG_PRINT("sparrow_handler", ("Reset auto increment on table %s.%s to %llu", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), static_cast(value))); + share_->getMaster().setAutoInc(static_cast(value)); + return 0; +}*/ + +bool SparrowHandler::check_if_incompatible_data(HA_CREATE_INFO* info, uint table_changes) { + SPARROW_ENTER("SparrowHandler::check_if_incompatible_data"); + return table_changes == IS_EQUAL_YES ? COMPATIBLE_DATA_YES : COMPATIBLE_DATA_NO; +} + +enum_alter_inplace_result SparrowHandler::check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info) { + SPARROW_ENTER("SparrowHandler::check_if_supported_inplace_alter"); + + // Check the alterations are compatible with Sparrow + if ( check_alterations( ha_alter_info->alter_info ) != 0 ) { + return HA_ALTER_ERROR; + } + + // Operations for altering a table that Sparrow does not care about + Alter_inplace_info::HA_ALTER_FLAGS inplace_ignore_operations= + Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT | + Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE; + + // Column alterations in Sparrow can be performed without table copy. + Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations= + Alter_inplace_info::ADD_INDEX | + Alter_inplace_info::DROP_INDEX | + Alter_inplace_info::ADD_UNIQUE_INDEX | + Alter_inplace_info::DROP_UNIQUE_INDEX | + Alter_inplace_info::ADD_PK_INDEX | + Alter_inplace_info::DROP_PK_INDEX | + Alter_inplace_info::ADD_COLUMN | + Alter_inplace_info::DROP_COLUMN | + Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER | + Alter_inplace_info::ALTER_STORED_COLUMN_ORDER | + Alter_inplace_info::ALTER_COLUMN_DEFAULT | + Alter_inplace_info::ADD_FOREIGN_KEY | + Alter_inplace_info::DROP_FOREIGN_KEY | + Alter_inplace_info::ALTER_VIRTUAL_COLUMN_TYPE | + Alter_inplace_info::ALTER_STORED_COLUMN_TYPE | + Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH | + Alter_inplace_info::ALTER_COLUMN_NAME | + Alter_inplace_info::ALTER_COLUMN_DEFAULT | + Alter_inplace_info::CHANGE_CREATE_OPTION; + + /* Is there at least one operation that requires copy algorithm? */ + if (ha_alter_info->handler_flags & ~(inplace_offline_operations | inplace_ignore_operations)) + return HA_ALTER_INPLACE_NOT_SUPPORTED; + + /*if (ha_alter_info->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) { + HA_CREATE_INFO *create_info= ha_alter_info->create_info; + + // TODO: Sparrow - allow changing character set/collation from utf8 to utf8_bin without table copy. + bool inplace = false; + if ( create_info->used_fields & (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET) + && table->s->table_charset == &my_charset_utf8mb4_general_ci + && (create_info->table_charset == &my_charset_utf8mb4_bin || create_info->default_table_charset == &my_charset_utf8mb4_bin) ) { + inplace = true; + } + + if ( !inplace) + return HA_ALTER_INPLACE_NOT_SUPPORTED; + }*/ + + if (ha_alter_info->handler_flags & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) { + // TODO: Sparrow: changing the length of a VARCHAR or VARBINARY field is allowed. + /*if (tmp != IS_EQUAL_YES + && tmp_new_field->sql_type == field->real_type() + && field->real_type() == MYSQL_TYPE_VARCHAR + && tmp_new_field->length != field->max_display_length()) { + tmp = IS_EQUAL_YES; + }*/ + } + + return HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE; +} + +//bool SparrowHandler::prepare_inplace_alter_table( TABLE *altered_table, Alter_inplace_info *ha_alter_info ) +bool SparrowHandler::prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, const dd::Table *old_table_def, dd::Table *new_table_def) +{ + SPARROW_ENTER("SparrowHandler::prepare_inplace_alter_table"); + return false; +} + +//bool SparrowHandler::inplace_alter_table( TABLE *altered_table, Alter_inplace_info *ha_alter_info ) +bool SparrowHandler::inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, const dd::Table *old_table_def, dd::Table *new_table_def) +{ + SPARROW_ENTER("SparrowHandler::prepare_inplace_alter_table"); + DBUG_PRINT("sparrow_handler", ("Start altering table %s.%s", share_->getDatabaseName().c_str(), share_->getTableName().c_str())); + + if ( ha_alter_info->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION ) { + update_create_info( ha_alter_info->create_info ); + } + + if ( ha_alter_info->handler_flags & (Alter_inplace_info::DROP_INDEX | Alter_inplace_info::DROP_UNIQUE_INDEX | Alter_inplace_info::DROP_PK_INDEX) ) { + drop_index( table, ha_alter_info->index_drop_buffer, ha_alter_info->index_drop_count ); + } + + if ( ha_alter_info->handler_flags & (Alter_inplace_info::ADD_INDEX | Alter_inplace_info::ADD_UNIQUE_INDEX | Alter_inplace_info::ADD_PK_INDEX) ) { + add_index( altered_table, ha_alter_info->key_info_buffer, ha_alter_info->index_add_buffer, ha_alter_info->index_add_count ); + } + + return false; +} + +//bool SparrowHandler::commit_inplace_alter_table( TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit ) +bool SparrowHandler::commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit, const dd::Table *old_table_def, dd::Table *new_table_def) +{ + SPARROW_ENTER("SparrowHandler::commit_inplace_alter_table"); + if ( commit == false && context_.getAltered() == true ) { + spw_print_error("Sparrow: Cannot rollback alterations already made on table %s.%s", + share_->getDatabaseName().c_str(), share_->getTableName().c_str()); + } + return false; +} + +//void SparrowHandler::notify_table_changed() +void SparrowHandler::notify_table_changed(Alter_inplace_info *ha_alter_info) +{ + SPARROW_ENTER("SparrowHandler::notify_table_changed"); +} + +int SparrowHandler::add_index(TABLE* altered_table, KEY* key_info_buffer, uint* index_add_buffer, uint nb) { + SPARROW_ENTER("SparrowHandler::add_index"); + Master& master = share_->getMaster(); + KEY* key_info = NULL; + try { + WriteGuard guard(master.getLock()); + Indexes indexes = master.getIndexes(); + IndexMappings mappings = master.getIndexMappings(); + Alterations alterations = master.getIndexAlterations(); + uint32_t serial = master.getIndexAlterSerial(); + + for ( uint i=0; igetDatabaseName().c_str(), share_->getTableName().c_str())); + const uint indexColumns = key_info->user_defined_key_parts; + ColumnIds columnIds(indexColumns); + for (uint j = 0; j < indexColumns; ++j) { + const Field* field = key_info->key_part[j].field; + uint32_t colId = UINT_MAX; + if ( field != NULL ) { + colId = static_cast(master.getColumn(Str(field->field_name))); + } else { + int colPos = static_cast(key_info->key_part[j].fieldnr); + colId = master.getColumn(colPos); + } + columnIds.append(colId); + } + const bool unique = (key_info->flags & HA_NOSAME) != 0; + const Index newIndex(key_info->name, columnIds, unique); + + // Append new index. + const uint32_t offset = indexes.length(); + indexes.append(newIndex); + const uint32_t mysqlIndexId = index_add_buffer[i]; + while (mappings.length() <= mysqlIndexId) { + mappings.append(-1); + } + mappings[mysqlIndexId] = static_cast(offset); + alterations.append(Alteration(ALT_ADD_INDEX, ++serial, offset)); + } + assert(!alterations.isEmpty()); + master.setIndexes(indexes); + master.setIndexMappings(mappings); + master.setIndexAlterSerial(serial); + master.setIndexAlterations(alterations); + master.toDisk(); + context_.setAltered( true ); + } catch(const SparrowException& e) { + if ( key_info != NULL ) { + spw_print_error("Sparrow: Cannot add index %s on table %s.%s: %s", print_KEY(*key_info).c_str(), + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), e.getText()); + } else { + spw_print_error("Sparrow: Cannot add index(es) on table %s.%s: %s", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), e.getText()); + } + return -1; + } + master.startIndexAlter(false); + return 0; +} + + +int SparrowHandler::drop_index(TABLE* table, KEY** key_info_buffer, uint nb) { + SPARROW_ENTER("SparrowHandler::drop_index"); + assert(nb != 0); + Master& master = share_->getMaster(); + KEY* key_info = NULL; + try { + WriteGuard guard(master.getLock()); + Indexes indexes = master.getIndexes(); + IndexMappings mappings = master.getIndexMappings(); + Alterations alterations = master.getIndexAlterations(); + uint32_t serial = master.getIndexAlterSerial(); + + for ( uint i=0; igetDatabaseName().c_str(), share_->getTableName().c_str())); + // Search for this index's position in mysql internal data array + const uint nbIndexes = table->s->keys; + uint j = 0; + for (; j < nbIndexes; ++j) { + const KEY& index = table->key_info[j]; + if ( strcmp( key_info->name, index.name ) == 0 ) { + const uint32_t mysqlIndexId = j; + const int offset = mappings[mysqlIndexId]; + mappings[mysqlIndexId] = -1; + indexes[offset].drop(); + alterations.append(Alteration(ALT_DROP_INDEX, ++serial, offset)); + break; + } + } + assert( j < nbIndexes ); + } + assert(!alterations.isEmpty()); + master.setIndexes(indexes); + master.setIndexMappings(mappings); + master.setIndexAlterSerial(serial); + master.setIndexAlterations(alterations); + master.toDisk(); + context_.setAltered( true ); + } catch(const SparrowException& e) { + if ( key_info!= NULL ) { + spw_print_error("Sparrow: Cannot drop index %s from table %s.%s: %s", print_KEY(*key_info).c_str(), + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), e.getText()); + } else { + spw_print_error("Sparrow: Cannot drop index(es) from table %s.%s: %s", + share_->getDatabaseName().c_str(), share_->getTableName().c_str(), e.getText()); + } + return -1; + } + master.startIndexAlter(false); + return 0; +} + +void SparrowHandler::update_create_info(HA_CREATE_INFO* create_info) { + SPARROW_ENTER("SparrowHandler::update_create_info"); + table->file->info(HA_STATUS_AUTO); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) { + create_info->auto_increment_value = stats.auto_increment_value; + } +} + +// Methods for information schema tables. + +// Timestamps fields are displayed in seconds. So the decimals are set to 0. +static ST_FIELD_INFO sparrow_tables_field_info[] = { + // Name, length, type, value, maybe_null, old_name, open_method. + {"TABLE_SCHEMA", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"DATA_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"INDEX_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"PERIOD", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"PARTITIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"FILES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"PERSISTENT_RECORDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"TRANSIENT_RECORDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"OLDEST", 0, MYSQL_TYPE_TIMESTAMP, 0, true, 0, 0 }, + {"NEWEST", 0, MYSQL_TYPE_TIMESTAMP, 0, true, 0, 0 }, + {"MAX_LIFETIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"DEFAULT_WHERE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"STRING_OPTIMIZATION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"COALESCING_PERIOD", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"COALESCING_PERCENTAGE", 5, MYSQL_TYPE_DOUBLE, 0, true, 0, 0 }, + {"AGE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} +}; + +// STATIC +int SparrowHandler::initializeISTables(void* p) { + SPARROW_ENTER("SparrowHandler::initializeISTables"); + ST_SCHEMA_TABLE* schema = static_cast(p); + schema->fields_info = sparrow_tables_field_info; + schema->fill_table = SparrowHandler::fillISTables; + return 0; +} + +// STATIC +int SparrowHandler::fillISTables(THD* thd, Table_ref* tables, [[maybe_unused]] Item* cond) { + SPARROW_ENTER("SparrowHandler::fillISTables"); + CHARSET_INFO* scs = system_charset_info; + TABLE* table = static_cast(tables->table); + SortedMasters masters = InternalApi::getAll(); + char tmp[1024]; + for (uint32_t i = 0; i < masters.length(); ++i) { + const Master& master = *masters[i]; + ReadGuard masterGuard(master.getLock()); + int f = 0; + const Str& sdatabase = master.getDatabase(); + table->field[f++]->store(sdatabase.c_str(), static_cast(sdatabase.length()), scs); + const Str& stable = master.getTable(); + table->field[f++]->store(stable.c_str(), static_cast(stable.length()), scs); + table->field[f++]->store(static_cast(master.getDataSize()), false); + table->field[f++]->store(static_cast(master.getIndexSize()), false); + table->field[f++]->store(static_cast(master.getAggregationPeriod()), false); + const uint32_t partitions = master.getPartitions().length(); + table->field[f++]->store(static_cast(partitions), false); + table->field[f++]->store(static_cast(1 + master.getIndexMappings().length()), false); + table->field[f++]->store(static_cast(master.getRecords()), false); + table->field[f++]->store(static_cast(master.getTransientRecords()), false); + const uint64_t oldest = master.getOldest() / 1000; + if (oldest == 0) { + table->field[f++]->set_null(); + } else { + table->field[f]->set_notnull(); + my_timeval tm; + tm.m_tv_sec = oldest; + tm.m_tv_usec = 0; + static_cast(table->field[f++])->store_timestamp(&tm); + } + const uint64_t newest = master.getNewest() / 1000; + if (newest == 0) { + table->field[f++]->set_null(); + } else { + table->field[f]->set_notnull(); + my_timeval tm; + tm.m_tv_sec = newest; + tm.m_tv_usec = 0; + static_cast(table->field[f++])->store_timestamp(&tm); + } + table->field[f++]->store(static_cast(master.getMaxLifetime() / 1000), false); + table->field[f++]->store(static_cast(master.getDefaultWhere() / 1000), false); + table->field[f++]->store(static_cast(master.getStringOptimization()), false); + table->field[f++]->store(static_cast(master.getCoalescingPeriod() / 1000), false); + const double coalescingPercentage = master.getCoalescingPercentage(); + if (coalescingPercentage < 0) { + table->field[f++]->set_null(); + } else { + table->field[f]->set_notnull(); + snprintf(tmp, sizeof(tmp), "%.1f", coalescingPercentage); + table->field[f++]->store(tmp, static_cast(strlen(tmp)), scs); + } + table->field[f++]->store(static_cast(master.getAge() / 1000), false); + schema_table_store_record(thd, table); + } + return 0; +} + +// STATIC +int SparrowHandler::deinitializeISTables(void* p) { + SPARROW_ENTER("SparrowHandler::deinitializeISTables"); + return 0; +} + +static ST_FIELD_INFO sparrow_columns_field_info[] = { + // Name, length, type, value, maybe_null, old_name, open_method. + {"TABLE_SCHEMA", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"COLUMN_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"INTERNAL_ID", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"IS_DROPPED", 3, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"IS_IP", 3, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"IP_LOOKUP", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, true, 0, 0 }, + {"IS_DNS_IDENTIFIER", 3, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"SERIAL", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"DROP_SERIAL", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} +}; + +// STATIC +int SparrowHandler::initializeISColumns(void* p) { + SPARROW_ENTER("SparrowHandler::initializeISColumns"); + ST_SCHEMA_TABLE* schema = static_cast(p); + schema->fields_info = sparrow_columns_field_info; + schema->fill_table = SparrowHandler::fillISColumns; + return 0; +} + +// STATIC +int SparrowHandler::fillISColumns(THD* thd, Table_ref* tables, [[maybe_unused]] Item* cond) { + SPARROW_ENTER("SparrowHandler::fillISColumns"); + CHARSET_INFO* scs = system_charset_info; + TABLE* table = static_cast(tables->table); + SortedMasters masters = InternalApi::getAll(); + for (uint32_t i = 0; i < masters.length(); ++i) { + const Master& master = *masters[i]; + ReadGuard masterGuard(master.getLock()); + const Columns& columns = master.getColumns(); + for (uint32_t j = 0; j < columns.length(); ++j) { + int f = 0; + const Str& sdatabase = master.getDatabase(); + table->field[f++]->store(sdatabase.c_str(), static_cast(sdatabase.length()), scs); + const Str& stable = master.getTable(); + table->field[f++]->store(stable.c_str(), static_cast(stable.length()), scs); + const Column& column = columns[j]; + const Str& scolumn = column.getName(); + table->field[f++]->store(scolumn.c_str(), static_cast(scolumn.length()), scs); + table->field[f++]->store(static_cast(j), false); + const char* s = column.isDropped() ? "YES" : "NO"; + table->field[f++]->store(s, static_cast(strlen(s)), scs); + s = column.isFlagSet(COL_IP_ADDRESS) ? "YES" : "NO"; + table->field[f++]->store(s, static_cast(strlen(s)), scs); + if (column.isFlagSet(COL_IP_LOOKUP)) { + table->field[f]->set_notnull(); + s = columns[column.getInfo()].getName().c_str(); + table->field[f++]->store(s, static_cast(strlen(s)), scs); + } else { + table->field[f++]->set_null(); + } + s = column.isFlagSet(COL_DNS_IDENTIFIER) ? "YES" : "NO"; + table->field[f++]->store(s, static_cast(strlen(s)), scs); + table->field[f++]->store(static_cast(column.getSerial()), false); + table->field[f++]->store(static_cast(column.getDropSerial()), false); + schema_table_store_record(thd, table); + } + } + return 0; +} + +// STATIC +int SparrowHandler::deinitializeISColumns(void* p) { + SPARROW_ENTER("SparrowHandler::deinitializeISColumns"); + return 0; +} + +static ST_FIELD_INFO sparrow_indexes_field_info[] = { + // Name, length, type, value, maybe_null, old_name, open_method. + {"TABLE_SCHEMA", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"INDEX_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"UNIQUE", 3, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"COLUMNS", 1024, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"FILE_SUFFIX", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} +}; + +// STATIC +int SparrowHandler::initializeISIndexes(void* p) { + SPARROW_ENTER("SparrowHandler::initializeISIndexes"); + ST_SCHEMA_TABLE* schema = static_cast(p); + schema->fields_info = sparrow_indexes_field_info; + schema->fill_table = SparrowHandler::fillISIndexes; + return 0; +} + +// STATIC +int SparrowHandler::fillISIndexes(THD* thd, Table_ref* tables, [[maybe_unused]] Item* cond) { + SPARROW_ENTER("SparrowHandler::fillISIndexes"); + CHARSET_INFO* scs = system_charset_info; + TABLE* table = static_cast(tables->table); + SortedMasters masters = InternalApi::getAll(); + for (uint32_t i = 0; i < masters.length(); ++i) { + const Master& master = *masters[i]; + ReadGuard masterGuard(master.getLock()); + const Indexes& indexes = master.getIndexes(); + const Columns& columns = master.getColumns(); + for (uint32_t j = 0; j < indexes.length(); ++j) { + const Index& index = indexes[j]; + if (index.isDropped()) { + continue; + } + int f = 0; + const Str& sdatabase = master.getDatabase(); + table->field[f++]->store(sdatabase.c_str(), static_cast(sdatabase.length()), scs); + const Str& stable = master.getTable(); + table->field[f++]->store(stable.c_str(), static_cast(stable.length()), scs); + const Str& sindex = index.getName(); + table->field[f++]->store(sindex.c_str(), static_cast(sindex.length()), scs); + const char* s = index.isUnique() ? "YES" : "NO"; + table->field[f++]->store(s, static_cast(strlen(s)), scs); + Str scolumns; + const ColumnIds& columnIds = index.getColumnIds(); + for (uint32_t k = 0; k < columnIds.length(); ++k) { + if (k > 0) { + scolumns += Str(", "); + } + scolumns += columns[columnIds[k]].getName(); + } + table->field[f++]->store(scolumns.c_str(), static_cast(scolumns.length()), scs); + char buffer[128]; + snprintf(buffer, sizeof(buffer), "_%02u.spi", j); + table->field[f++]->store(buffer, static_cast(strlen(buffer)), scs); + schema_table_store_record(thd, table); + } + } + return 0; +} + +// STATIC +int SparrowHandler::deinitializeISIndexes(void* p) { + SPARROW_ENTER("SparrowHandler::deinitializeISIndexes"); + return 0; +} + +static ST_FIELD_INFO sparrow_alterations_field_info[] = { + // Name, length, type, value, maybe_null, old_name, open_method. + {"TABLE_SCHEMA", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"INFO", 1024, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"PERCENTAGE", 5, MYSQL_TYPE_DOUBLE, 0, false, 0, 0 }, + {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} +}; + +// STATIC +int SparrowHandler::initializeISAlterations(void* p) { + SPARROW_ENTER("SparrowHandler::initializeISAlterations"); + ST_SCHEMA_TABLE* schema = static_cast(p); + schema->fields_info = sparrow_alterations_field_info; + schema->fill_table = SparrowHandler::fillISAlterations; + return 0; +} + +// STATIC +int SparrowHandler::fillISAlterations(THD* thd, Table_ref* tables, [[maybe_unused]] Item* cond) { + SPARROW_ENTER("SparrowHandler::fillISAlterations"); + CHARSET_INFO* scs = system_charset_info; + TABLE* table = static_cast(tables->table); + SortedMasters masters = InternalApi::getAll(); + char tmp[1024]; + for (uint32_t i = 0; i < masters.length(); ++i) { + uint64_t elapsed; + uint64_t left; + double percentage; + const Master& master = *masters[i]; + ReadGuard guard(master.getLock()); + if (master.getIndexAlterStatus(elapsed, left, percentage)) { + const Alterations& alterations = master.getIndexAlterations(); + for (uint32_t j = 0; j < alterations.length(); ++j) { + int f = 0; + const Str& sdatabase = master.getDatabase(); + table->field[f++]->store(sdatabase.c_str(), static_cast(sdatabase.length()), scs); + const Str& stable = master.getTable(); + table->field[f++]->store(stable.c_str(), static_cast(stable.length()), scs); + const Str info = alterations[j].getDescription(master); + table->field[f++]->store(info.c_str(), static_cast(info.length()), scs); + snprintf(tmp, sizeof(tmp), "%.1f", percentage); + table->field[f++]->store(tmp, static_cast(strlen(tmp)), scs); + schema_table_store_record(thd, table); + } + } + } + return 0; +} + +// STATIC +int SparrowHandler::deinitializeISAlterations(void* p) { + SPARROW_ENTER("SparrowHandler::deinitializeISAlterations"); + return 0; +} + +// Timestamps fields are displayed in seconds. So the decimals are set to 0. +static ST_FIELD_INFO sparrow_partitions_field_info[] = { + // Name, length, type, value, maybe_null, old_name, open_method. + {"TABLE_SCHEMA", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {"VERSION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"SERIAL", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"DATA_SERIAL", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"DATA_RECORDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"RECORD_OFFSET", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"INDEX_ALTER_SERIAL", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"COLUMN_ALTER_SERIAL", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"FILESYSTEM", 1024, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + //{"START", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_TIMESTAMP, 0, false, 0, 0 }, + {"START", 0, MYSQL_TYPE_TIMESTAMP, 0, false, 0, 0 }, + //{"END", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_TIMESTAMP, 0, false, 0, 0 }, + {"END", 0, MYSQL_TYPE_TIMESTAMP, 0, false, 0, 0 }, + {"DURATION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"RECORDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"DATA_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"INDEX_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, false, 0, 0 }, + {"IS_READY", 3, MYSQL_TYPE_STRING, 0, false, 0, 0 }, + {0, 0, MYSQL_TYPE_NULL, 0, 0, 0, 0} +}; + +// STATIC +int SparrowHandler::initializeISPartitions(void* p) { + SPARROW_ENTER("SparrowHandler::initializeISPartitions"); + ST_SCHEMA_TABLE* schema = static_cast(p); + schema->fields_info = sparrow_partitions_field_info; + schema->fill_table = SparrowHandler::fillISPartitions; + return 0; +} + +// STATIC +int SparrowHandler::fillISPartitions(THD* thd, Table_ref* tables, [[maybe_unused]] Item* cond) { + SPARROW_ENTER("SparrowHandler::fillISPartitions"); + CHARSET_INFO* scs = system_charset_info; + TABLE* table = static_cast(tables->table); + SortedMasters masters = InternalApi::getAll(); + for (uint32_t i = 0; i < masters.length(); ++i) { + const Master& master = *masters[i]; + ReadGuard masterGuard(master.getLock()); + const Partitions& partitions = master.getPartitions(); + for (uint32_t j = 0; j < partitions.length(); ++j) { + const Partition& p = *partitions[j]; + if (p.isTransient() || p.isTemporary()) { + continue; + } + const PersistentPartition& partition = static_cast(p); + int f = 0; + const Str& sdatabase = master.getDatabase(); + table->field[f++]->store(sdatabase.c_str(), static_cast(sdatabase.length()), scs); + const Str& stable = master.getTable(); + table->field[f++]->store(stable.c_str(), static_cast(stable.length()), scs); + table->field[f++]->store(static_cast(partition.getVersion()), false); + table->field[f++]->store(static_cast(partition.getSerial()), false); + table->field[f++]->store(static_cast(partition.getDataSerial()), false); + table->field[f++]->store(static_cast(partition.getDataRecords()), false); + table->field[f++]->store(static_cast(partition.getRecordOffset()), false); + table->field[f++]->store(static_cast(partition.getIndexAlterSerial()), false); + table->field[f++]->store(static_cast(partition.getColumnAlterSerial()), false); + const char* s = FileUtil::getFilesystemPath(partition.getFilesystem()); + table->field[f++]->store(s, static_cast(strlen(s)), scs); + // For tables in the information_schema, timestamps must be rounded to the second, therefore, we round the min timestamp to the lower second, + // and the max timestamp to the higher second. + const uint64_t start = partition.getMin(); + my_timeval tm; + tm.m_tv_sec = start/1000; + tm.m_tv_usec = 0; + static_cast(table->field[f++])->store_timestamp(&tm); + const uint64_t end = partition.getMax(); + tm.m_tv_sec = end/1000; + if ((end%1000) != 0) tm.m_tv_sec += 1; + static_cast(table->field[f++])->store_timestamp(&tm); + table->field[f++]->store(static_cast(end - start), false); + table->field[f++]->store(static_cast(partition.getRecords()), false); + table->field[f++]->store(static_cast(partition.getDataSize()), false); + table->field[f++]->store(static_cast(partition.getIndexSize()), false); + s = partition.isReady() ? "YES" : "NO"; + table->field[f++]->store(s, static_cast(strlen(s)), scs); + schema_table_store_record(thd, table); + } + } + return 0; +} + +// STATIC +int SparrowHandler::deinitializeISPartitions(void* p) { + SPARROW_ENTER("SparrowHandler::deinitializeISPartitions"); + return 0; +} + +// STATIC +Str SparrowHandler::print_KEY(const KEY& index) { + char buffer[256]; + sprintf( buffer, "%s {", index.name ); + for ( uint i=0; ifield_name); + } else { + char buffer2[64]; + sprintf(buffer2, "%u", index.key_part[i].fieldnr); + strcat(buffer, buffer2); + } + } + strcat(buffer, "}"); + return Str(buffer, static_cast(strlen(buffer))); +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TableShare +////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This object is locked by MySQL and represents the table data. It contains a reference to the +// master file, as well as pre-built fields and record readers which can be shared between +// multiple handler threads. + +SYSpHash TableShare::hash_(1024); +Lock TableShare::lock_(true, "TableShare::lock_"); + +TableShare::TableShare(const Str& databaseName, const Str& tableName, TABLE* table) _THROW_(SparrowException) + : databaseName_(databaseName), tableName_(tableName), key_(table == 0) { + SPARROW_ENTER("TableShare::TableShare"); + if (!key_) { + thr_lock_init(&tableLock_); + master_ = InternalApi::get(databaseName.c_str(), tableName.c_str(), false, false, table->s); + bool changed = false; + PartitionIds flushedPartitions; + { + WriteGuard guard(master_->getLock()); + + // Check if there are column alterations. + if (current_thd->lex->alter_info != nullptr) + { + Alter_info& alterInfo = *current_thd->lex->alter_info; + const uint alterFlags = alterInfo.flags; + if (alterFlags & Alter_info::ALTER_ADD_COLUMN) { + List_iterator iterator(alterInfo.create_list); + Create_field* field; + while ((field = iterator++) != 0) { + Column newColumn = SparrowHandler::createColumn(*field); + master_->addColumn(field->after, newColumn); + changed = true; + } + } + if (alterFlags & Alter_info::ALTER_DROP_COLUMN) { + for (const Alter_drop *drop : alterInfo.drop_list) { + if (drop->type == Alter_drop::COLUMN) { + master_->dropColumn(drop->name); + changed = true; + } + } + } + if (alterFlags & Alter_info::ALTER_CHANGE_COLUMN) { + List_iterator iterator(alterInfo.create_list); + Create_field* field; + while ((field = iterator++) != 0) { + master_->renameColumn(field->change, field->field_name); + changed = true; + } + } + if (alterFlags & Alter_info::ALTER_ADD_INDEX || alterFlags & Alter_info::ALTER_DROP_INDEX) { + changed = true; + } + if (changed) { + master_->toDisk(); + + // Force flush to make sure transient partitions are up to date. + //master_->forceFlushNoLock(); + TransientPartitions transientPartitions; + master_->getTransientPartitions( transientPartitions ); + master_->forceFlushNoLock( transientPartitions, true ); + + uint nb = transientPartitions.length(); + if (nb != 0) { + flushedPartitions.resize(nb); + for (uint i=0; igetSerial()); + } + } + + alterInfo.flags = 0; + } + } + columnAlterSerial_ = master_->getColumnAlterSerial(); + + // If table fields do not match master's columns, this is a temporary table opened for alteration: + // do not try to build fields and record readers. + const Columns& columns = master_->getColumns(); + uint32_t j = 0; + for (uint32_t i = 0; i < columns.length(); ++i) { + const Column& column = columns[i]; + if (column.isDropped()) { + continue; + } + if (j == table->s->fields) { + return; + } + const Str name(table->field[j++]->field_name, false); + if (name.compareTo(column.getName(), true) != 0) { + return; + } + } + if (j < table->s->fields) { + return; + } + + // Create fields. + const uint32_t serial = master_->getColumnAlterSerial(); + FieldBase::createFields(serial, false, table->field, columns, fields_); + for (uint32_t i = 0; i < fields_.length(); ++i) { + FieldBase* field = fields_[i]; + if (field != 0 && field->isMapped()) { + mappedFields_.append(field); + } + } + + // Create record readers. The first record reader is for the SPD files, the following ones are for the + // index files (the nodes in the index files contain the values of the indexed columns). + const Indexes& indexes = master_->getIndexes(); + const uint32_t nbIndexes = indexes.length(); + recordWrappers_.resize(1 + nbIndexes * 2); + recordWrappers_.append(RecordWrapper(fields_, 0, false)); + for (uint32_t i = 0; i < nbIndexes; ++i) { + const ColumnIds* columnIds = &indexes[i].getColumnIds(); + recordWrappers_.append(RecordWrapper(fields_, columnIds, false)); + recordWrappers_.append(RecordWrapper(fields_, columnIds, true)); + } + } + if (changed) { + // Wait until flush is done. + master_->waitForFlush(flushedPartitions); + } + } +} + +SerialRecordWrapper* TableShare::createSerialRecordWrapper(TABLE& table, const uint32_t serial, const uint32_t index, const bool tree) const { + TableFieldsGuard fieldsGuard; + TableFields& fields = fieldsGuard.get(); + ReadGuard guard(master_->getLock()); + FieldBase::createFields(serial, false, table.field, master_->getColumns(), fields); + return new SerialRecordWrapper(serial, index, tree, fields, index == DATA_FILE ? 0 : &master_->getIndexes()[index].getColumnIds()); +} + +PartSerialRecordWrapper* TableShare::createPartSerialRecordWrapper(TABLE& table, const uint32_t alterSerial, const uint32_t partSerial, const ColumnIds& skippedColumnIds) const { + TableFields fields; + ReadGuard guard(master_->getLock()); + FieldBase::createFields(alterSerial, false, table.field, master_->getColumns(), fields, &skippedColumnIds); + return new PartSerialRecordWrapper(partSerial, fields); +} + +TableShare::~TableShare() { + if (!key_) { + mappedFields_.clear(); + fields_.clearAndDestroy(); + thr_lock_delete(&tableLock_); + } +} + +// STATIC +TableShare* TableShare::acquire(const Str& databaseName, const Str& tableName, TABLE* table, THR_LOCK_DATA* lockData) _THROW_(SparrowException) { + const TableShare key(databaseName, tableName, 0); + TableShare* share = 0; + { + Guard guard(lock_); + share = hash_.find(&key); + if (share == 0) { + share = new TableShare(databaseName, tableName, table); + hash_.insert(share); + } + } + thr_lock_data_init(&share->tableLock_, lockData, 0); + share->acquireRef(); + return share; +} + +// STATIC +void TableShare::release(TableShare* share) { + if (share->releaseRef()) { + Guard guard(lock_); + hash_.remove(share); + delete share; + } +} + +} diff --git a/storage/sparrow/handler/hasparrow.h b/storage/sparrow/handler/hasparrow.h new file mode 100644 index 000000000000..5f659c75a32d --- /dev/null +++ b/storage/sparrow/handler/hasparrow.h @@ -0,0 +1,671 @@ +/* + Sparrow handler. +*/ + +#ifndef _handler_handler_h_ +#define _handler_handler_h_ + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +#include "../engine/master.h" +#include "../engine/misc.h" + +extern "C" char** thd_query(MYSQL_THD thd); + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// RecordWrapper +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define SPARROW_MAX_BIT_SIZE 64 + +typedef SYSarray BitArray; + +/* Reads a record from a partition and format the data to the MySQL format using the TableFields. + The bit mask indicates which columns to read and send back. The bit mask corresponds to + index definitions. +*/ +class PartitionReader; +class RecordWrapper { +protected: + + TableFields fields_; + uint32_t bits_; + uint32_t bitSize_; + uint32_t size_; + +private: + void initialize(const TableFields& fields, const ColumnIds* columnIds, const bool tree) _THROW_(SparrowException); + +public: + + RecordWrapper() : bits_(0), bitSize_(0), size_(0) { + } + + RecordWrapper(const TableFields& fields, const ColumnIds* columnIds, const bool tree) _THROW_(SparrowException); + + RecordWrapper(TableFields& fields, const ColumnIds* columnIds, const bool tree, const bool removeFromFields) _THROW_(SparrowException); + + RecordWrapper(const TableFields& fields, const ColumnIds& skippedColumnIds) _THROW_(SparrowException); + + virtual ~RecordWrapper() { + } + + const TableFields& getFields() const { + return fields_; + } + + uint32_t getSize() const { + return size_; + } + + uint32_t getBitSize() const { + return bitSize_; + } + + void readBits(ByteBuffer& buffer, uint8_t* bits) const { + ByteBuffer b(bits, getBitSize()); + buffer >> b; + } + + void readUsingKeyPartMap(PartitionReader& reader, PartitionReader& stringReader, const key_part_map map, + uint8_t* buffer, const bool keyFormat) const _THROW_(SparrowException); + + void readUsingTableBitmap(TABLE& table, PartitionReader& reader, PartitionReader& stringReader, const bool all, + uint8_t* buffer, const bool keyFormat) const _THROW_(SparrowException); + + void readKeyValue(PartitionReader& reader, PartitionReader& stringReader, ByteBuffer& buffer, BinBuffer* binBuffer) const _THROW_(SparrowException); + + int compare(ByteBuffer& buffer1, PartitionReader& stringReader1, ByteBuffer& buffer2, PartitionReader& stringReader2, + BinBuffer* binBuffer) const _THROW_(SparrowException); +}; + +typedef SYSvector RecordWrappers; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SerialRecordWrapper +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SerialRecordWrapper : public RecordWrapper { +private: + + const uint32_t serial_; // Column alteration serial number + const uint32_t index_; + const bool tree_; + +public: + + SerialRecordWrapper(const uint32_t serial, const uint32_t index, const bool tree) : serial_(serial), index_(index), tree_(tree) { + } + + SerialRecordWrapper(const uint32_t serial, const uint32_t index, const bool tree, + TableFields& fields, const ColumnIds* columnIds) _THROW_(SparrowException) + : RecordWrapper(fields, columnIds, tree, true), serial_(serial), index_(index), tree_(tree) { + } + + ~SerialRecordWrapper() { + fields_.clearAndDestroy(); + } + + uint32_t hash() const { + uint32_t result = 31 + index_; + result = 31 * result + serial_; + result = 31 * result + (tree_ ? 1231 : 1237); + return result; + } + + bool operator == (const SerialRecordWrapper& right) const { + return serial_ == right.serial_ && index_ == right.index_ && tree_ == right.tree_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// PartSerialRecordWrapper +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class PartSerialRecordWrapper : public RecordWrapper { +private: + + const uint32_t serial_; // Partition serial number + +public: + + PartSerialRecordWrapper(const uint32_t serial) : serial_(serial) { + } + + PartSerialRecordWrapper(const uint32_t serial, const TableFields& fields) _THROW_(SparrowException) + : RecordWrapper(fields, NULL, false), serial_(serial) { + } + + ~PartSerialRecordWrapper() { + fields_.clearAndDestroy(); + } + + uint32_t hash() const { + return serial_; + } + + bool operator == (const PartSerialRecordWrapper& right) const { + return serial_ == right.serial_; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// ColumnInfo +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class ColumnInfo { +private: + + uint32_t id_; + uint32_t bitOffset_; + uint32_t nbits_; + uint32_t offset_; + uint32_t size_; + +public: + + ColumnInfo() + : id_(0), bitOffset_(0), nbits_(0), offset_(0), size_(0) { + } + + ColumnInfo(const uint32_t id, const uint32_t bitOffset, const uint32_t nbits, const uint32_t offset, const uint32_t size) + : id_(id), bitOffset_(bitOffset), nbits_(nbits), offset_(offset), size_(size) { + } + + uint32_t getId() const { + return id_; + } + + uint32_t getBitOffset() const { + return bitOffset_; + } + + uint32_t getNBits() const { + return nbits_; + } + + uint32_t getOffset() const { + return offset_; + } + + uint32_t getSize() const { + return size_; + } +}; + +typedef SYSvector ColumnInfos; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// DataFileReader +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class DataFileReader { +protected: + + const TableFields& fields_; + ColumnInfos infos_; + const RecordWrapper recordWrapper_; + +public: + + DataFileReader(const TableFields& fields, const ColumnIds& columnIds, const ColumnIds& skippedColumns); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// BitMapGuard +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class BitmapGuard { +private: + + TABLE* table_; + my_bitmap_map* savedMap_; + +public: + + BitmapGuard(TABLE* table) : table_(table), savedMap_(tmp_use_all_columns(table, table->read_set)) { + } + + ~BitmapGuard() { + tmp_restore_column_map(table_->read_set, savedMap_); + } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowHandler +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TableShare; +class SparrowHandler : public handler { + friend class TableShare; + +private: + + TableShare* share_; + THR_LOCK_DATA lockData_; + + // Context. + Context context_; + + static bool initialized_; + +private: + + static Column createColumn(Field& field) _THROW_(SparrowException); + + static Column createColumn(Create_field& field) _THROW_(SparrowException); + + static Column createColumn(const char* name, const enum_field_types fieldType, const uint decimals, const uint32_t fieldFlags, + const CHARSET_INFO* charset, const Str& defaultValue) _THROW_(SparrowException); + +public: + + // Static methods for handlerton. + + static int initialize(void* p); + + static int deinitialize(void* p); + + static int start_slave_threads(); + static int stop_slave_threads(); + + static handler* create(handlerton* hton, TABLE_SHARE* table, bool partitioned, MEM_ROOT* mem_root); + + static int closeConnection(handlerton* hton, THD* thd); + + static void dropDatabase(handlerton* hton, char* path); + + static int panic(handlerton* hton, enum ha_panic_function flag); + + static bool showStatus(handlerton* hton, THD* thd, stat_print_fn* stat_print, enum ha_stat_type stat_type); + + SparrowHandler(handlerton* hton, TABLE_SHARE* table); + + ~SparrowHandler(); + + // -------------------------------------------------------- + // Meta data routines to CREATE, DROP, RENAME table are often used at ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..). + + // Note: dd::Table* arguments are to be used only if we decide to support atomic DDL + + int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info, dd::Table *table_def) override; + + int delete_table(const char *name, const dd::Table *table_def) override; + + int rename_table(const char *from, const char *to, const dd::Table *from_table_def, dd::Table *to_table_def) override; + + void update_create_info(HA_CREATE_INFO* create_info) override; + + // -------------------------------------------------------- + // Open and close handler object to ensure all underlying files and objects allocated and deallocated for query handling is handled properly. + int open(const char *name, int mode, uint test_if_locked, const dd::Table *table_def) override; + + int close() override; + + // -------------------------------------------------------- + // This module contains methods that are used to understand start/end of statements, transaction boundaries, and aid for proper concurrency control. + THR_LOCK_DATA** store_lock(THD* thd, THR_LOCK_DATA** to, enum thr_lock_type lockType) override; + + int external_lock(THD* thd, int lockType) override; + + // -------------------------------------------------------- + // This part of the handler interface is used to change the records after INSERT, DELETE, UPDATE, REPLACE method calls but also other + // special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE. + + int write_row(uchar* buf) override; + + int update_row(const uchar* old_data, uchar* new_data) override; + + int delete_row(const uchar* buf) override; + + int delete_all_rows() override; + + void start_bulk_insert(ha_rows rows) override; + + int end_bulk_insert() override; + + // -------------------------------------------------------- + // This module is used for the most basic access method for any table handler. This is to fetch all data through a full table scan. No indexes are needed to implement this part. + + int rnd_init(bool scan) override; + + int rnd_next(uchar* buf) override; + + int rnd_pos(uchar* buf, uchar* pos) override; + + int rnd_end() override; + + void position(const uchar* record) override; + + // -------------------------------------------------------- + // This part of the handler interface is used to perform access through indexes. The interface is defined as a scan interface but the handler + // can also use key lookup if the index is a unique index or a primary key index. + + int index_init(uint idx, bool sorted) override; + + int index_next(uchar* buf) override; + + int index_prev(uchar* buf) override; + + int index_first(uchar* buf) override; + + int index_last(uchar* buf) override; + + int index_read_map(uchar* buf, const uchar* key, + key_part_map keyPartMap, enum ha_rkey_function findFlag) override; + + int index_read_idx_map(uchar* buf, uint index, const uchar* key, + key_part_map keyPartMap, enum ha_rkey_function findFlag) override; + + int index_read_last_map(uchar* buf, const uchar* key, key_part_map keyPartMap) override; + + int index_end() override; + + // -------------------------------------------------------- + // This calls are used to inform the handler of specifics of the ongoing scans and other actions. Most of these are used for optimisation purposes. + + // TBI. See mysql\include\my_base.h:734 and mysql\storage\example\ha_example.cc:515 + // Myisam may also be a good example. + int info(uint) override; + + // TBI. See mysql\include\my_base.h:184 and mysql\storage\example\ha_example.cc:2716 + int extra(enum ha_extra_function operation) override; + + int reset() override; + + // -------------------------------------------------------- + // Optimizer support + + double scan_time() override; + + double read_time(uint index, uint ranges, ha_rows rows) override; + + ha_rows records_in_range(uint inx, key_range* minKey, key_range* maxKey) override; + + // TBI. See mysql\sql\handler.h:5325 + ha_rows estimate_rows_upper_bound() override; + + int records(ha_rows *num_rows) override; + + // -------------------------------------------------------- + // This module contains various methods that returns text messages for table types, index type and error messages. + + const char* table_type() const override; + + bool get_error_message(int error, String* buf) override; + + + // -------------------------------------------------------- + // This module contains a number of methods defining limitations and characteristics of the handler (see also documentation regarding the + // individual flags). + + // See sql\handler.h:4309 and sql\handler.h:209 for list of flags + handler::Table_flags table_flags() const override; + + ulong index_flags(uint inx, uint part, bool all_parts) const override; + + uint max_supported_record_length() const override; + + uint max_supported_keys() const override; + + uint max_supported_key_parts() const override; + + uint max_supported_key_length() const override; + + uint max_supported_key_part_length(HA_CREATE_INFO *create_info) const override; + + enum ha_key_alg get_default_index_algorithm() const override; + + bool is_index_algorithm_supported(enum ha_key_alg key_alg) const override; + + // Has been replaced with methods get_default_index_algorithm(), is_index_algorithm_supported(). See mysql\sql\handler.h:4309 + // Default imlpementation is fine. + //const char* index_type(uint inx) override; + + // -------------------------------------------------------- + // This module is used to handle the support of auto increments. + + void get_auto_increment(ulonglong offset, ulonglong increment, ulonglong nb_desired_values, ulonglong* first_value, + ulonglong* nb_reserved_values) override; + + // Not sure what it does. Only implemented in storage engine InnoDB. + void release_auto_increment() override { return; } + + // Seems to have be removed + //int reset_auto_increment(ulonglong value) override; + + // -------------------------------------------------------- + // Methods for in-place ALTER TABLE support + // See mysql\sql\handler.h:6020 + + [[deprecated("Part of old, deprecated in-place ALTER API.")]] + bool check_if_incompatible_data(HA_CREATE_INFO* info, uint table_changes) override; + + // Called by our check_if_incompatible_data() to analyze the raw alterations, Alter_inplace_info::Alter_info + int check_alterations(Alter_info* info); + + // TODO: Review implementation of these methods: old implementation in new method. + enum_alter_inplace_result check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info) override; + + // Note: dd::Table* arguments are to be used only if we decide to support atomic DDL. + bool prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, const dd::Table *old_table_def, dd::Table *new_table_def) override; + + bool inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, const dd::Table *old_table_def, dd::Table *new_table_def) override; + + bool commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit, const dd::Table *old_table_def, dd::Table *new_table_def) override; + + void notify_table_changed(Alter_inplace_info *ha_alter_info) override; + + + + // -------------------------------------------------------- + // Administrative DDL (Data Definition Langage) + // Methods that handle the strucure of the data (tables, index) + + int analyze(THD* thd, HA_CHECK_OPT* checkOpt) override; + + int check(THD* thd, HA_CHECK_OPT* checkOpt) override; + + int optimize(THD* thd, HA_CHECK_OPT* checkOpt) override; + + int repair(THD* thd, HA_CHECK_OPT* checkOpt) override; + + // Have those foreign key methods been replaced by something else ? + //char* get_foreign_key_create_info() override; + + //void free_foreign_key_create_info(char* str) override; + + //int get_foreign_key_list(THD* thd, List* f_key_list) override; + + // Not part of the handler interface anymore, but still used internally by inplace_alter_table() + int add_index(TABLE* altered_table, KEY* key_info_buffer, uint* indexes, uint nb); + + // Not part of the handler interface anymore, but still used internally by inplace_alter_table() + int drop_index(TABLE* table, KEY** key_info, uint nb); + + [[deprecated("Not implemented in 5.6.36: Admin commands not supported currently (almost purely MyISAM routines). This means that the following methods are not implemented. See sql/ha_partition.h")]] + //int backup(THD* thd, HA_CHECK_OPT* checkOpt) override; + + [[deprecated("Not implemented in 5.6.36: Admin commands not supported currently (almost purely MyISAM routines). This means that the following methods are not implemented. See sql/ha_partition.h")]] + //int restore(THD* thd, HA_CHECK_OPT* checkOpt) override; + + + // Overrides handler::clone() but the latter is not declared virtual + handler* clone(const char* name, MEM_ROOT* mem_root) override; + + void column_bitmaps_signal() override; + + void unlock_row() override; + + bool is_crashed() const override; + + bool auto_repair() const override; + + uint getActiveIndex() { + return active_index; + } + + void setActiveIndex(const uint idx) { + active_index= idx; + } + + TABLE& getTable() { + return *table; + } + + const TABLE& getTable() const { + return *table; + } + + ha_statistics& getStats() { + return stats; + } + + // Static methods for information schema + + static int initializeISTables(void* p); + + static int fillISTables(THD* thd, Table_ref* tables, Item* cond); + + static int deinitializeISTables(void* p); + + static int initializeISColumns(void* p); + + static int fillISColumns(THD* thd, Table_ref* tables, Item* cond); + + static int deinitializeISColumns(void* p); + + static int initializeISIndexes(void* p); + + static int fillISIndexes(THD* thd, Table_ref* tables, Item* cond); + + static int deinitializeISIndexes(void* p); + + static int initializeISAlterations(void* p); + + static int fillISAlterations(THD* thd, Table_ref* tables, Item* cond); + + static int deinitializeISAlterations(void* p); + + static int initializeISPartitions(void* p); + + static int fillISPartitions(THD* thd, Table_ref* tables, Item* cond); + + static int deinitializeISPartitions(void* p); + + // Misc + static uint alterTableFlags(uint flags); + + +private: + + static Str stripComments(const char* sql); + + static ForeignKeys getForeignKeys(const char* sql, const Str& databaseName, + const Str& tableName, TABLE* table); + + static const char* moveTo(const char* s, const char* keyword); + + static const char* getIdentifier(const char* s, Str& identifier, bool& hasDot); + + static Str print_KEY(const KEY& index); +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// TableShare +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TableShare : public RefCounted { +private: + + static SYSpHash hash_; + static Lock lock_; + + THR_LOCK tableLock_; + const Str databaseName_; + const Str tableName_; + const bool key_; + + // Master file. + MasterGuard master_; + + // Current column alteration serial. + uint32_t columnAlterSerial_; + + // Table fields. + TableFields mappedFields_; // Fields for columns as currently seen by MySQL (not including columns that have been deleted) + TableFields fields_; // Fields for all columns, including deleted columns + + // Prepared record readers. + RecordWrappers recordWrappers_; + +public: + + TableShare(const Str& databaseName, const Str& tableName, TABLE* table) _THROW_(SparrowException); + + ~TableShare(); + + const Str& getDatabaseName() const { + return databaseName_; + } + + const Str& getTableName() const { + return tableName_; + } + + Master& getMaster() { + return *master_; + } + + uint32_t getColumnAlterSerial() const { + return columnAlterSerial_; + } + + const Master& getMaster() const { + return *master_; + } + + const TableFields& getMappedFields() const { + return mappedFields_; + } + + const TableFields& getFields() const { + return fields_; + } + + SerialRecordWrapper* createSerialRecordWrapper(TABLE& table, const uint32_t serial, const uint32_t index, const bool tree) const; + + PartSerialRecordWrapper* createPartSerialRecordWrapper(TABLE& table, const uint32_t alterSerial, const uint32_t partSerial, const ColumnIds& skippedColumnIds) const; + + const RecordWrapper& getRecordWrapper(const uint32_t index, const bool tree) const { + return recordWrappers_[index == DATA_FILE ? 0 : 1 + index * 2 + (tree ? 1 : 0)]; + } + + bool operator == (const TableShare& right) const { + return databaseName_ == right.databaseName_ && tableName_ == right.tableName_; + } + + static TableShare* acquire(const Str& databaseName, const Str& tableName, TABLE* table, THR_LOCK_DATA* lockData) _THROW_(SparrowException); + + static void release(TableShare* share); + + uint32_t hash() const { + uint32_t result = 1; + result = 31 + databaseName_.hash(); + result = 31 * result + tableName_.hash(); + return result; + } +}; + +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#endif /* #ifndef _handler_handler_h_ */ diff --git a/storage/sparrow/handler/plugin.cc b/storage/sparrow/handler/plugin.cc new file mode 100644 index 000000000000..5b240c843d0b --- /dev/null +++ b/storage/sparrow/handler/plugin.cc @@ -0,0 +1,691 @@ +/* + Sparrow plugin. +*/ + +#include "include/mysql/plugin.h" +#include "plugin.h" +#include "hasparrow.h" +#include "../engine/coalescing.h" + +using namespace Sparrow; + +using std::snprintf; + +#ifdef _WIN32 +#define MYSQL_SYSVAR_UINT64 MYSQL_SYSVAR_ULONGLONG +#else +#define MYSQL_SYSVAR_UINT64 MYSQL_SYSVAR_ULONG +#endif + +static SparrowStatus sparrowStatus; + +static int show_coalescing_queue_size(MYSQL_THD thd, struct SHOW_VAR *var, char *buf) +{ + var->type= SHOW_CHAR; + var->value= buf; // it's of SHOW_VAR_FUNC_BUFF_SIZE bytes + snprintf(buf, SHOW_VAR_FUNC_BUFF_SIZE, "%u", CoalescingWorker::getQueue().getSize()); + return 0; +} + +static int show_disk_total(MYSQL_THD thd, struct SHOW_VAR *var, char *buf) +{ + var->type= SHOW_CHAR; + var->value= buf; // it's of SHOW_VAR_FUNC_BUFF_SIZE bytes + uint64_t totalFree = 0; + uint64_t totalUsed = 0; + uint64_t totalSize = 0; + FileUtil::getDiskStats(totalFree, totalUsed, totalSize); + snprintf(buf, SHOW_VAR_FUNC_BUFF_SIZE, "%llu", static_cast(totalSize)); + return 0; +} + +static int show_disk_used(MYSQL_THD thd, struct SHOW_VAR *var, char *buf) +{ + var->type= SHOW_CHAR; + var->value= buf; // it's of SHOW_VAR_FUNC_BUFF_SIZE bytes + uint64_t totalFree = 0; + uint64_t totalUsed = 0; + uint64_t totalSize = 0; + FileUtil::getDiskStats(totalFree, totalUsed, totalSize); + snprintf(buf, SHOW_VAR_FUNC_BUFF_SIZE, "%llu", static_cast(totalUsed)); + return 0; +} + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +static SHOW_VAR sparrow_status_variables[]= { + { "api_active_connections", (char*) &sparrowStatus.apiActiveConnections_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "api_connections", (char*) &sparrowStatus.apiConnections_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "api_requests", (char*) &sparrowStatus.apiRequests_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "api_input_bytes", (char*) &sparrowStatus.apiInputBytes_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "api_input_uncompressed_bytes", (char*) &sparrowStatus.apiInputUncompressedBytes_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "api_responses", (char*) &sparrowStatus.apiResponses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "api_output_bytes", (char*) &sparrowStatus.apiOutputBytes_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "api_output_uncompressed_bytes", (char*) &sparrowStatus.apiOutputUncompressedBytes_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_read_bytes", (char*) &sparrowStatus.ioReadBytes_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_written_bytes", (char*) &sparrowStatus.ioWrittenBytes_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_reads", (char*) &sparrowStatus.ioReads_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_writes", (char*) &sparrowStatus.ioWrites_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_opens", (char*) &sparrowStatus.ioOpens_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_closes", (char*) &sparrowStatus.ioCloses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_buffers", (char*) &sparrowStatus.ioBuffers_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "io_buffer_size", (char*) &sparrowStatus.ioBufferSize_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_nb_small", (char*) &sparrowStatus.ioNbSmall_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_nb_medium", (char*) &sparrowStatus.ioNbMedium_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "io_nb_large", (char*) &sparrowStatus.ioNbLarge_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "tuple_buffer_size", (char*) &sparrowStatus.tupleBufferSize_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "total_size", (char*) &sparrowStatus.totalSize_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "free_disk_space", (char*) &sparrowStatus.freeDiskSpace_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "file_cache_acquires", (char*) &sparrowStatus.fileCacheAcquires_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "file_cache_releases", (char*) &sparrowStatus.fileCacheReleases_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "file_cache_misses", (char*) &sparrowStatus.fileCacheMisses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "file_cache_hits", (char*) &sparrowStatus.fileCacheHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "file_cache_slow_hits", (char*) &sparrowStatus.fileCacheSlowHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_acquires", (char*) &sparrowStatus.blockCacheAcquires_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_releases", (char*) &sparrowStatus.blockCacheReleases_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_misses", (char*) &sparrowStatus.blockCacheMisses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_hits", (char*) &sparrowStatus.blockCacheHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_slow_hits", (char*) &sparrowStatus.blockCacheSlowHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level0_hits", (char*) &sparrowStatus.blockCacheLvl0Hits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level1_hits", (char*) &sparrowStatus.blockCacheLvl1Hits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level2_hits", (char*) &sparrowStatus.blockCacheLvl2Hits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level3_hits", (char*) &sparrowStatus.blockCacheLvl3Hits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level0_slow_hits", (char*) &sparrowStatus.blockCacheLvl0SlowHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level1_slow_hits", (char*) &sparrowStatus.blockCacheLvl1SlowHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level2_slow_hits", (char*) &sparrowStatus.blockCacheLvl2SlowHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level3_slow_hits", (char*) &sparrowStatus.blockCacheLvl3SlowHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level0_misses", (char*) &sparrowStatus.blockCacheLvl0Misses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level1_misses", (char*) &sparrowStatus.blockCacheLvl1Misses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level2_misses", (char*) &sparrowStatus.blockCacheLvl2Misses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level3_misses", (char*) &sparrowStatus.blockCacheLvl3Misses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "block_cache_level0_fill_ratio", (char*) &sparrowStatus.blockCacheLvl0FillRatio_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "block_cache_level1_fill_ratio", (char*) &sparrowStatus.blockCacheLvl1FillRatio_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "block_cache_level2_fill_ratio", (char*) &sparrowStatus.blockCacheLvl2FillRatio_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "block_cache_level3_fill_ratio", (char*) &sparrowStatus.blockCacheLvl3FillRatio_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "dns_caches", (char*) &sparrowStatus.dnsCaches_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "dns_cache_acquires", (char*) &sparrowStatus.dnsCacheAcquires_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_cache_hits", (char*) &sparrowStatus.dnsCacheHits_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_cache_evictions", (char*) &sparrowStatus.dnsCacheEvictions_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_cache_entries", (char*) &sparrowStatus.dnsCacheEntries_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_cache_pending_entries", (char*) &sparrowStatus.dnsCachePendingEntries_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_cache_size", (char*) &sparrowStatus.dnsCacheSize_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_requests", (char*) &sparrowStatus.dnsRequests_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_retries", (char*) &sparrowStatus.dnsRetries_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_responses", (char*) &sparrowStatus.dnsResponses_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_discarded_responses_1", (char*) &sparrowStatus.dnsDiscardedResponses1_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_discarded_responses_2", (char*) &sparrowStatus.dnsDiscardedResponses2_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_discarded_responses_3", (char*) &sparrowStatus.dnsDiscardedResponses3_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_discarded_responses_4", (char*) &sparrowStatus.dnsDiscardedResponses4_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_no_answer", (char*) &sparrowStatus.dnsNoAnswer_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_decoding", (char*) &sparrowStatus.dnsErrorsDecoding_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_unknown", (char*) &sparrowStatus.dnsErrorsUnknown_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_format", (char*) &sparrowStatus.dnsErrorsFormat_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_failure", (char*) &sparrowStatus.dnsErrorsFailure_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_name", (char*) &sparrowStatus.dnsErrorsName_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_not_implemented", (char*) &sparrowStatus.dnsErrorsNotImplemented_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_refused", (char*) &sparrowStatus.dnsErrorsRefused_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_yx_domain", (char*) &sparrowStatus.dnsErrorsYXDomain_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_yx_rr_set", (char*) &sparrowStatus.dnsErrorsYXRRSet_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_nx_rr_set", (char*) &sparrowStatus.dnsErrorsNXRRSet_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_not_auth", (char*) &sparrowStatus.dnsErrorsNotAuth_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "dns_errors_not_zone", (char*) &sparrowStatus.dnsErrorsNotZone_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "ddl_serial", (char*) &sparrowStatus.ddlSerial_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "worker_threads", (char*) &sparrowStatus.workerThreads_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "flush_threads", (char*) &sparrowStatus.flushThreads_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "dns_worker_threads", (char*) &sparrowStatus.dnsWorkerThreads_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "writer_threads", (char*) &sparrowStatus.writerThreads_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "api_worker_threads", (char*) &sparrowStatus.apiWorkerThreads_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "alter_threads", (char*) &sparrowStatus.alterThreads_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "coalescing_threads", (char*) &sparrowStatus.coalescingThreads_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "disk_total", (char*) &show_disk_total, SHOW_FUNC, SHOW_SCOPE_GLOBAL }, + { "disk_used", (char*) &show_disk_used, SHOW_FUNC, SHOW_SCOPE_GLOBAL }, + { "coalescing_maintask_processed", (char*) &sparrowStatus.coalescingMainTaskProcessed_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "coalescing_indextask_processed", (char*) &sparrowStatus.coalescingIndexTaskProcessed_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "coalescing_queue_size", (char*) &show_coalescing_queue_size, SHOW_FUNC, SHOW_SCOPE_GLOBAL }, + { "flush_waits", (char*) &sparrowStatus.flushWait_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "flush_forced", (char*) &sparrowStatus.flushForced_, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_flush_tasks", (char*) &sparrowStatus.tasksPendingFlushTasks_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_flush_alltasks", (char*) &sparrowStatus.tasksPendingFlushAllTasks_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_flush_jobs", (char*) &sparrowStatus.tasksPendingFlushJobs_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_flush_string_jobs", (char*) &sparrowStatus.tasksPendingFlushJobs_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_flush_index_jobs", (char*) &sparrowStatus.tasksPendingIndexJobs_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_flush_write_jobs", (char*) &sparrowStatus.tasksPendingWriteJobs_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_coalescing_maintasks", (char*) &sparrowStatus.tasksPendingCoalescingMainTasks_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_coalescing_indextasks", (char*) &sparrowStatus.tasksPendingCoalescingIndexTasks_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { "tasks_pending_dns_tasks", (char*) &sparrowStatus.tasksPendingDnsTasks_, SHOW_INT, SHOW_SCOPE_GLOBAL }, + { NullS, NullS, SHOW_INT, SHOW_SCOPE_GLOBAL } +}; + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +static int show_sparrow_vars(THD *thd, SHOW_VAR *var, char *buff) { + sparrowStatus = SparrowStatus::get(); + var->type= SHOW_ARRAY; + var->value= (char *) &sparrow_status_variables; + var->scope = SHOW_SCOPE_GLOBAL; + return 0; +} + +static SHOW_VAR sparrow_status_variables_export[]= { + { "sparrow", (char*) &show_sparrow_vars, SHOW_FUNC, SHOW_SCOPE_GLOBAL }, + { NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL } +}; + +uint sparrow_open_files; +uint sparrow_cache_block_size, sparrow_small_read_block_size, sparrow_medium_read_block_size, sparrow_large_read_block_size; +uint sparrow_write_block_size, sparrow_transfer_block_size; +int sparrow_socket_sndbuf_size, sparrow_socket_rcvbuf_size; +uint sparrow_idle_thread_timeout; +uint sparrow_default_max_lifetime; +uint sparrow_index_cost_percentage; +uint64_t sparrow_max_disk_size; +bool sparrow_async_io; +bool sparrow_coalescing, sparrow_purge_constantly; +uint64_t sparrow_purge_security_margin; +uint sparrow_max_flush_threads, sparrow_max_worker_threads, sparrow_max_writer_threads, sparrow_max_dns_worker_threads, sparrow_max_alter_threads, sparrow_max_coalescing_threads, sparrow_max_api_worker_threads; +uint sparrow_flush_interval; +uint64_t sparrow_direct_insertion_threshold; +uint64_t sparrow_max_tuple_buffer_size; +uint64_t sparrow_default_string_optimization_size; +uint sparrow_tuple_buffer_threshold; +uint sparrow_listener_port; +char* sparrow_listener_address; +uint sparrow_max_connections; +ulong sparrow_incompatible_table; +uint64_t sparrow_cache0_size, sparrow_cache1_size, sparrow_cache2_size, sparrow_cache3_size; +uint sparrow_disk_sector_size; +uint64_t sparrow_max_dns_cache_size; +uint sparrow_dns_timeout; +uint sparrow_dns_retries; +char* sparrow_filesystems; +char* sparrow_coalescing_filesystems; +bool sparrow_column_optimisation; +uint sparrow_column_optimisation_lvl; +bool sparrow_quick_shutdown; +bool sparrow_auto_partition_repair; +bool sparrow_disable_purge; +bool sparrow_log_purge_activity; + +const uint32_t SPARROW_VERSION = 0x0100; /* 1.0 */ + +static MYSQL_SYSVAR_UINT(open_files, + sparrow_open_files, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Maximum number of files opened simultaneously in Sparrow.", + 0, 0, 300, 10, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(default_max_lifetime, + sparrow_default_max_lifetime, + PLUGIN_VAR_RQCMDARG, + "Default maximum lifetime of data in a table, in days. When this lifetime is reached, older data are purged.", + 0, 0, 365, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(index_cost_percentage, + sparrow_index_cost_percentage, + PLUGIN_VAR_RQCMDARG, + "Percentage applied to the number of records in range, in order to favor index scans over table scans. Default is 50%: twice less records in range than computed.", + 0, 0, 50, 0, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT64(max_disk_size, + sparrow_max_disk_size, + PLUGIN_VAR_RQCMDARG, + "Maximum disk size used by all Sparrow tables. If 0, all disk space except a security margin of 1 GB per file system may be used.", + 0, 0, 0, 0, ULONG_MAX, 0); + +static MYSQL_SYSVAR_UINT(small_read_block_size, + sparrow_small_read_block_size, + PLUGIN_VAR_RQCMDARG, + "Block size used when reading e.g. data files during an index scan.", + 0, 0, 512*1024, 512, UINT_MAX, 512); + +static MYSQL_SYSVAR_UINT(medium_read_block_size, + sparrow_medium_read_block_size, + PLUGIN_VAR_RQCMDARG, + "Block size used when reading e.g. master files.", + 0, 0, 1024*1024, 512, UINT_MAX, 512); + +static MYSQL_SYSVAR_UINT(large_read_block_size, + sparrow_large_read_block_size, + PLUGIN_VAR_RQCMDARG, + "Block size used when reading e.g. index records during an index scan or data records during a full scan.", + 0, 0, 2048*1024, 512, UINT_MAX, 512); + +static MYSQL_SYSVAR_UINT(write_block_size, + sparrow_write_block_size, + PLUGIN_VAR_RQCMDARG, + "Block size used when writing to files.", + 0, 0, 64*1024, 512, UINT_MAX, 512); + +static MYSQL_SYSVAR_UINT(transfer_block_size, + sparrow_transfer_block_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Block size used when reading data from the network.", + 0, 0, 1024*1024, 512, UINT_MAX, 512); + +static MYSQL_SYSVAR_INT(socket_sndbuf_size, + sparrow_socket_sndbuf_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of the socket buffer used to send data.", + 0, 0, 2048, 512, INT_MAX, 0); + +static MYSQL_SYSVAR_INT(socket_rcvbuf_size, + sparrow_socket_rcvbuf_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of the socket buffer used to receive data.", + 0, 0, 1024*1024, 512, INT_MAX, 0); + +static MYSQL_SYSVAR_UINT(cache_block_size, + sparrow_cache_block_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Block size used when caching data.", + 0, 0, 4*1024, 512, UINT_MAX, 512); + +static MYSQL_SYSVAR_UINT64(cache0_size, + sparrow_cache0_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of level 0 cache. This cache level contains data read recently but not yet used.", + 0, 0, 16*1024*1024, 1024*1024, ULONG_MAX, 1024*1024); + +static MYSQL_SYSVAR_UINT64(cache1_size, + sparrow_cache1_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of level 1 cache. This cache level contains meta data and index records from recent queries.", + 0, 0, 64*1024*1024, 1024*1024, ULONG_MAX, 1024*1024); + +static MYSQL_SYSVAR_UINT64(cache2_size, + sparrow_cache2_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of level 2 cache. This cache level contains data records from recent queries.", + 0, 0, 64*1024*1024, 1024*1024, ULONG_MAX, 1024*1024); + +static MYSQL_SYSVAR_UINT64(cache3_size, + sparrow_cache3_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of level 3 cache. This cache level contains data records from recent inserts.", + 0, 0, 64*1024*1024, 1024*1024, ULONG_MAX, 1024*1024); + +static MYSQL_SYSVAR_UINT(idle_thread_timeout, + sparrow_idle_thread_timeout, + PLUGIN_VAR_OPCMDARG, + "Idle thread timeout, in milliseconds. When an idle thread times out, it is destroyed.", + 0, 0, 300000, 1000, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(max_flush_threads, + sparrow_max_flush_threads, + PLUGIN_VAR_RQCMDARG, + "Maximum number of flush threads.", + 0, 0, 4, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(max_worker_threads, + sparrow_max_worker_threads, + PLUGIN_VAR_RQCMDARG, + "Maximum number of worker threads.", + 0, 0, 4, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(max_writer_threads, + sparrow_max_writer_threads, + PLUGIN_VAR_RQCMDARG, + "Maximum number of writer threads.", + 0, 0, 1, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(max_dns_worker_threads, + sparrow_max_dns_worker_threads, + PLUGIN_VAR_RQCMDARG, + "Maximum number of worker threads for DNS resolution.", + 0, 0, 2, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(max_alter_threads, + sparrow_max_alter_threads, + PLUGIN_VAR_RQCMDARG, + "Maximum number of threads used for online table modifications.", + 0, 0, 1, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_BOOL(async_io, + sparrow_async_io, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "If true, use asynchronous I/O. This is enabled by default.", + 0, 0, true); + +static MYSQL_SYSVAR_BOOL(coalescing, + sparrow_coalescing, + PLUGIN_VAR_RQCMDARG, + "If false, partition coalescing is disabled. It is enabled by default.", + 0, 0, true); + +static MYSQL_SYSVAR_BOOL(purge_constantly, + sparrow_purge_constantly, + PLUGIN_VAR_RQCMDARG, + "If true, the purge process compares the partition timestamps to the current time instead of the most recent inserted timestamp. Therefore, if the client application stops inserting data into a table, this data from this table will continue to be purge regularly. It is disabled by default.", + 0, 0, false); + +static MYSQL_SYSVAR_UINT64(purge_security_margin, + sparrow_purge_security_margin, + PLUGIN_VAR_RQCMDARG, + "Sets the amount of free space to leave on each file system.", + 0, 0, 1024ULL*1024*1024, 1024, ULONG_MAX, 1024); + +static MYSQL_SYSVAR_UINT(max_coalescing_threads, + sparrow_max_coalescing_threads, + PLUGIN_VAR_RQCMDARG, + "Maximum number of threads used for partition coalescing.", + 0, 0, 1, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT(max_api_worker_threads, + sparrow_max_api_worker_threads, + PLUGIN_VAR_RQCMDARG, + "Maximum number of API worker threads.", + 0, 0, 10, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT64(direct_insertion_threshold, + sparrow_direct_insertion_threshold, + PLUGIN_VAR_RQCMDARG, + "Insertion size threshold : If the inserted data are above this threshold, the data is directly inserted to the engine without using the context buffer", + 0, 0, 512*1024, 1024, ULONG_MAX, 1024); + +static MYSQL_SYSVAR_UINT64(max_tuple_buffer_size, + sparrow_max_tuple_buffer_size, + PLUGIN_VAR_RQCMDARG, + "Maximum size of the tuple buffer. When this buffer is full, insertions are blocked until a flush occurs.", + 0, 0, 64*1024*1024, 1024*1024, ULONG_MAX, 1024*1024); + +static MYSQL_SYSVAR_UINT64(default_string_optimization_size, + sparrow_default_string_optimization_size, + PLUGIN_VAR_RQCMDARG, + "Default amount of data read from the string file when optimizing transient strings. Strings already present in the file will not be flushed.", + 0, 0, 16*1024*1024, 0, ULONG_MAX, 0); + +static MYSQL_SYSVAR_UINT(tuple_buffer_threshold, + sparrow_tuple_buffer_threshold, + PLUGIN_VAR_RQCMDARG, + "Threshold, in percentage of sparrow_max_tuple_buffer_size, above which data are flushed to disk.", + 0, 0, 50, 1, 100, 0); + +#ifdef NDEBUG +#define MIN_SPARROW_FLUSH_INTERVAL 5 +#else +#define MIN_SPARROW_FLUSH_INTERVAL 0 +#endif +static MYSQL_SYSVAR_UINT(flush_interval, + sparrow_flush_interval, + PLUGIN_VAR_RQCMDARG, + "Interval, in seconds, between data flushes. Data may be written more often if the maximum tuple buffer size is reached before this time interval elapses.", + 0, 0, 300, MIN_SPARROW_FLUSH_INTERVAL, 3600, 0); + +static MYSQL_SYSVAR_UINT(listener_port, + sparrow_listener_port, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "TCP port on which Sparrow binds its listener socket.", + 0, 0, 11000, 1, 65535, 0); + +static MYSQL_SYSVAR_STR(listener_address, + sparrow_listener_address, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Local address on which Sparrow binds its listener socket.", + 0, 0, 0); + +static MYSQL_SYSVAR_UINT(max_connections, + sparrow_max_connections, + PLUGIN_VAR_RQCMDARG, + "Maximum number of simultaneous connections on Sparrow.", + 0, 0, 10, 1, UINT_MAX, 0); + +const char* sparrowIncompatibleTableNames[] = { "drop", "rename", NullS }; + +TYPELIB sparrowIncompatibleTableTypelib = { + array_elements(sparrowIncompatibleTableNames) - 1, + "sparrowIncompatibleTableTypelib", + sparrowIncompatibleTableNames, + 0 +}; + +static MYSQL_SYSVAR_ENUM(incompatible_table, + sparrow_incompatible_table, + PLUGIN_VAR_RQCMDARG, + "Action taken when an incompatible Sparrow table is detected.", + 0, + 0, + 0, + &sparrowIncompatibleTableTypelib); + +static MYSQL_SYSVAR_UINT(disk_sector_size, + sparrow_disk_sector_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Unix only: disk block size of the partition where the database resides.", + 0, 0, 512, 1, UINT_MAX, 0); + +static MYSQL_SYSVAR_UINT64(max_dns_cache_size, + sparrow_max_dns_cache_size, + PLUGIN_VAR_RQCMDARG, + "Maximum size of the DNS cache.", + 0, 0, 64*1024*1024, 0, ULONG_MAX, 1024); + +static MYSQL_SYSVAR_UINT(dns_timeout, + sparrow_dns_timeout, + PLUGIN_VAR_RQCMDARG, + "Default DNS query timeout, in milliseconds.", + 0, 0, 200, 1, 1000, 0); + +static MYSQL_SYSVAR_UINT(dns_retries, + sparrow_dns_retries, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Default maximum number of retries when a query times out. If multiple servers are available for the query, the retry is performed on the next server.", + 0, 0, 2, 0, 10, 0); + +static MYSQL_SYSVAR_STR(filesystems, + sparrow_filesystems, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Comma separated list of additional file systems where Sparrow will store its data and index files. Caution: removing a file system from this list requires a database re-initialization.", + 0, 0, 0); + +static MYSQL_SYSVAR_STR(coalescing_filesystems, + sparrow_coalescing_filesystems, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Comma separated list of additional file systems where Sparrow will store index files before coalescing. To be efficient, this file system must be backed by fast HDDs or SSDs. Caution: removing a file system from this list requires a database re-initialization.", + 0, 0, 0); + +static MYSQL_SYSVAR_BOOL(column_optimisation, + sparrow_column_optimisation, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "If true, use nullptr column optimization. This is enabled by default.", + 0, 0, true); + +static MYSQL_SYSVAR_UINT(column_optimisation_lvl, + sparrow_column_optimisation_lvl, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Sparrow automatically removes from partitions columns that contains only nullptr values. But partitions with different sets of nullptr columns cannot be coalesced together. This can thus generate an increase in the number of partitions. To limit that increase, you can change this parameter. If set to 0, all the nullptr columns will be optimized, but it may generate many partitions. If set to high value (example: 10), as many nullptr columns won't be optimized, but a lot less partitions will be generated. Default value is 2.", + 0, 0, 2, 0, 10, 0); + +static MYSQL_SYSVAR_BOOL(quick_shutdown, + sparrow_quick_shutdown, + PLUGIN_VAR_RQCMDARG, + "If true, data in memory are not written to disk upon shutdown to save time. Setting it to false increases shutdown time significantly and, if process is killed during that time, files may get corrupted. This is enabled by default.", + 0, 0, true); + +static MYSQL_SYSVAR_BOOL(auto_partition_repair, + sparrow_auto_partition_repair, + PLUGIN_VAR_RQCMDARG, + "If true, Sparrow will automatically try to repair corrupted partitions, and delete them if the repair fails. This is enabled by default.", + 0, 0, true); + +static MYSQL_SYSVAR_BOOL(disable_purge, + sparrow_disable_purge, + PLUGIN_VAR_RQCMDARG, + "If true, automatic partition purging is disabled. This is for debugging purpose and should ony be used by dev team. This is false by default.", + 0, 0, false); + +static MYSQL_SYSVAR_BOOL(log_purge_activity, + sparrow_log_purge_activity, + PLUGIN_VAR_RQCMDARG, + "If true, a message will be logged if disk space is too low and forces the purge module to delete old data partitions that have not yet reached their max lifetime to make space for newly inserted data. This is true by default.", + 0, 0, true); + + +static struct SYS_VAR* sparrow_system_variables[] = { + MYSQL_SYSVAR(open_files), + MYSQL_SYSVAR(default_max_lifetime), + MYSQL_SYSVAR(index_cost_percentage), + MYSQL_SYSVAR(max_disk_size), + MYSQL_SYSVAR(small_read_block_size), + MYSQL_SYSVAR(medium_read_block_size), + MYSQL_SYSVAR(large_read_block_size), + MYSQL_SYSVAR(write_block_size), + MYSQL_SYSVAR(transfer_block_size), + MYSQL_SYSVAR(socket_sndbuf_size), + MYSQL_SYSVAR(socket_rcvbuf_size), + MYSQL_SYSVAR(cache_block_size), + MYSQL_SYSVAR(cache0_size), + MYSQL_SYSVAR(cache1_size), + MYSQL_SYSVAR(cache2_size), + MYSQL_SYSVAR(cache3_size), + MYSQL_SYSVAR(idle_thread_timeout), + MYSQL_SYSVAR(max_flush_threads), + MYSQL_SYSVAR(max_worker_threads), + MYSQL_SYSVAR(max_api_worker_threads), + MYSQL_SYSVAR(max_writer_threads), + MYSQL_SYSVAR(max_dns_worker_threads), + MYSQL_SYSVAR(max_alter_threads), + MYSQL_SYSVAR(async_io), + MYSQL_SYSVAR(coalescing), + MYSQL_SYSVAR(purge_constantly), + MYSQL_SYSVAR(purge_security_margin), + MYSQL_SYSVAR(max_coalescing_threads), + MYSQL_SYSVAR(max_tuple_buffer_size), + MYSQL_SYSVAR(direct_insertion_threshold), + MYSQL_SYSVAR(default_string_optimization_size), + MYSQL_SYSVAR(tuple_buffer_threshold), + MYSQL_SYSVAR(flush_interval), + MYSQL_SYSVAR(listener_port), + MYSQL_SYSVAR(listener_address), + MYSQL_SYSVAR(max_connections), + MYSQL_SYSVAR(incompatible_table), + MYSQL_SYSVAR(disk_sector_size), + MYSQL_SYSVAR(max_dns_cache_size), + MYSQL_SYSVAR(dns_timeout), + MYSQL_SYSVAR(dns_retries), + MYSQL_SYSVAR(filesystems), + MYSQL_SYSVAR(coalescing_filesystems), + MYSQL_SYSVAR(column_optimisation), + MYSQL_SYSVAR(column_optimisation_lvl), + MYSQL_SYSVAR(quick_shutdown), + MYSQL_SYSVAR(auto_partition_repair), + MYSQL_SYSVAR(disable_purge), + MYSQL_SYSVAR(log_purge_activity), + 0 +}; + +// Information schema plugins for Sparrow. +struct st_mysql_information_schema sparrow_is_info = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION }; +struct st_mysql_plugin sparrow_tables = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &sparrow_is_info, + SPARROW_IS_TABLES_NAME, + SPARROW_AUTH, + SPARROW_IS_TABLES_DESC, + PLUGIN_LICENSE_PROPRIETARY, + SparrowHandler::initializeISTables, + nullptr, + SparrowHandler::deinitializeISTables, + SPARROW_VERSION, + nullptr, + nullptr, + nullptr, + 0 +}; + +struct st_mysql_plugin sparrow_columns = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &sparrow_is_info, + SPARROW_IS_COLUMNS_NAME, + SPARROW_AUTH, + SPARROW_IS_COLUMNS_DESC, + PLUGIN_LICENSE_PROPRIETARY, + SparrowHandler::initializeISColumns, + nullptr, + SparrowHandler::deinitializeISColumns, + SPARROW_VERSION, + nullptr, + nullptr, + nullptr, + 0 +}; + +struct st_mysql_plugin sparrow_indexes = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &sparrow_is_info, + SPARROW_IS_INDEXES_NAME, + SPARROW_AUTH, + SPARROW_IS_INDEXES_DESC, + PLUGIN_LICENSE_PROPRIETARY, + SparrowHandler::initializeISIndexes, + nullptr, + SparrowHandler::deinitializeISIndexes, + SPARROW_VERSION, + nullptr, + nullptr, + nullptr, + 0 +}; + +struct st_mysql_plugin sparrow_alterations = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &sparrow_is_info, + SPARROW_IS_ALTERATIONS_NAME, + SPARROW_AUTH, + SPARROW_IS_ALTERATIONS_DESC, + PLUGIN_LICENSE_PROPRIETARY, + SparrowHandler::initializeISAlterations, + nullptr, + SparrowHandler::deinitializeISAlterations, + SPARROW_VERSION, + nullptr, + nullptr, + nullptr, + 0 +}; + +struct st_mysql_plugin sparrow_partitions = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &sparrow_is_info, + SPARROW_IS_PARTITIONS_NAME, + SPARROW_AUTH, + SPARROW_IS_PARTITIONS_DESC, + PLUGIN_LICENSE_PROPRIETARY, + SparrowHandler::initializeISPartitions, + nullptr, + SparrowHandler::deinitializeISPartitions, + SPARROW_VERSION, + nullptr, + nullptr, + nullptr, + 0 +}; + +// The Sparrow storage engine plugin. +struct st_mysql_storage_engine sparrow_storage_engine = { MYSQL_HANDLERTON_INTERFACE_VERSION }; +mysql_declare_plugin(sparrow) { + MYSQL_STORAGE_ENGINE_PLUGIN, + &sparrow_storage_engine, + SPARROW_ENGINE_NAME, + SPARROW_AUTH, + SPARROW_ENGINE_DESC, + PLUGIN_LICENSE_PROPRIETARY, + SparrowHandler::initialize, /* Plugin init. */ + nullptr, /* Plugin uninstall. Not necessary. */ + SparrowHandler::deinitialize, /* Plugin deinit. */ + SPARROW_VERSION, + sparrow_status_variables_export, /* Status variables. */ + sparrow_system_variables, /* System variables. */ + nullptr, /* Config options, reserved for future dependency checking. */ + 0 +}, +sparrow_tables, +sparrow_columns, +sparrow_indexes, +sparrow_alterations, +sparrow_partitions +mysql_declare_plugin_end; diff --git a/storage/sparrow/handler/plugin.h b/storage/sparrow/handler/plugin.h new file mode 100644 index 000000000000..47565e91a42a --- /dev/null +++ b/storage/sparrow/handler/plugin.h @@ -0,0 +1,214 @@ +/* + Sparrow plugin. +*/ + +#ifndef _handler_plugin_h_ +#define _handler_plugin_h_ + +//#include +#include "sql/query_options.h" // For mysqld options. +#include "sql/sql_plugin.h" +#include "mysys_err.h" + +#define SPARROW_ENGINE_NAME "SPARROW" +#define SPARROW_ENGINE_DESC "MySQL Storage Engine for InfoVista" +#define SPARROW_AUTH "InfoVista S.A." + +#define SPARROW_IS_TABLES_NAME "SPARROW_TABLES" +#define SPARROW_IS_TABLES_DESC "Information about Sparrow tables" + +#define SPARROW_IS_COLUMNS_NAME "SPARROW_COLUMNS" +#define SPARROW_IS_COLUMNS_DESC "Information about columns in Sparrow tables" + +#define SPARROW_IS_INDEXES_NAME "SPARROW_INDEXES" +#define SPARROW_IS_INDEXES_DESC "Information about indexes on Sparrow tables" + +#define SPARROW_IS_ALTERATIONS_NAME "SPARROW_ALTERATIONS" +#define SPARROW_IS_ALTERATIONS_DESC "Information about on going alterations on Sparrow tables" + +#define SPARROW_IS_PARTITIONS_NAME "SPARROW_PARTITIONS" +#define SPARROW_IS_PARTITIONS_DESC "Information about partitions in Sparrow tables" + +extern uint sparrow_open_files; +extern uint sparrow_cache_block_size, sparrow_small_read_block_size, sparrow_medium_read_block_size, sparrow_large_read_block_size; +extern uint sparrow_write_block_size, sparrow_transfer_block_size; +extern int sparrow_socket_sndbuf_size, sparrow_socket_rcvbuf_size; +extern uint sparrow_default_time_period; +extern uint sparrow_default_max_lifetime; +extern uint sparrow_index_cost_percentage; +extern uint64_t sparrow_max_disk_size; +extern bool sparrow_async_io; +extern bool sparrow_coalescing, sparrow_purge_constantly; +extern uint64_t sparrow_purge_security_margin; +extern uint sparrow_max_flush_threads, sparrow_max_worker_threads, sparrow_max_writer_threads, sparrow_max_dns_worker_threads, sparrow_max_alter_threads, sparrow_max_coalescing_threads, sparrow_max_api_worker_threads; +extern uint sparrow_flush_interval; +extern uint64_t sparrow_max_tuple_buffer_size; +extern uint64_t sparrow_default_string_optimization_size; +extern uint64_t sparrow_direct_insertion_threshold; +extern uint sparrow_tuple_buffer_threshold; +extern uint sparrow_listener_port; +extern char* sparrow_listener_address; +extern uint sparrow_max_connections; +extern ulong sparrow_incompatible_table; +extern uint64_t sparrow_cache0_size, sparrow_cache1_size, sparrow_cache2_size, sparrow_cache3_size; +extern uint sparrow_disk_sector_size; +extern uint64_t sparrow_max_dns_cache_size; +extern uint sparrow_dns_timeout; +extern uint sparrow_dns_retries; +extern char* sparrow_filesystems; +extern char* sparrow_coalescing_filesystems; +extern bool sparrow_column_optimisation; +extern uint sparrow_column_optimisation_lvl; +extern bool sparrow_quick_shutdown; +extern bool sparrow_auto_partition_repair; +extern bool sparrow_disable_purge; +extern bool sparrow_log_purge_activity; + +extern const uint32_t SPARROW_VERSION; + +namespace Sparrow { + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// SparrowStatus +////////////////////////////////////////////////////////////////////////////////////////////////////// + +class SparrowStatus { +private: + + static SparrowStatus status_; + +public: + + // API stats. + volatile uint32_t apiActiveConnections_; + volatile uint64_t apiConnections_; + volatile uint64_t apiRequests_; + volatile uint64_t apiInputBytes_; + volatile uint64_t apiInputUncompressedBytes_; + volatile uint64_t apiResponses_; + volatile uint64_t apiOutputBytes_; + volatile uint64_t apiOutputUncompressedBytes_; + + // IO stats + volatile uint64_t ioReadBytes_; + volatile uint64_t ioWrittenBytes_; + volatile uint64_t ioReads_; + volatile uint64_t ioWrites_; + volatile uint64_t ioOpens_; + volatile uint64_t ioCloses_; + volatile uint32_t ioBuffers_; + volatile uint64_t ioBufferSize_; + volatile uint64_t ioNbSmall_; + volatile uint64_t ioNbMedium_; + volatile uint64_t ioNbLarge_; + + // Sizes. + volatile uint64_t tupleBufferSize_; + volatile uint64_t totalSize_; + volatile uint64_t freeDiskSpace_; + + // Stats for file cache (see CacheStat class). + volatile uint64_t fileCacheAcquires_; + volatile uint64_t fileCacheReleases_; + volatile uint64_t fileCacheMisses_; + volatile uint64_t fileCacheHits_; + volatile uint64_t fileCacheSlowHits_; + + // Stats for block cache (see CacheStat class). + volatile uint64_t blockCacheAcquires_; + volatile uint64_t blockCacheReleases_; + volatile uint64_t blockCacheMisses_; + volatile uint64_t blockCacheHits_; + volatile uint64_t blockCacheSlowHits_; + volatile uint64_t blockCacheLvl0Hits_; + volatile uint64_t blockCacheLvl1Hits_; + volatile uint64_t blockCacheLvl2Hits_; + volatile uint64_t blockCacheLvl3Hits_; + volatile uint64_t blockCacheLvl0SlowHits_; + volatile uint64_t blockCacheLvl1SlowHits_; + volatile uint64_t blockCacheLvl2SlowHits_; + volatile uint64_t blockCacheLvl3SlowHits_; + volatile uint64_t blockCacheLvl0Misses_; + volatile uint64_t blockCacheLvl1Misses_; + volatile uint64_t blockCacheLvl2Misses_; + volatile uint64_t blockCacheLvl3Misses_; + volatile uint32_t blockCacheLvl0FillRatio_; + volatile uint32_t blockCacheLvl1FillRatio_; + volatile uint32_t blockCacheLvl2FillRatio_; + volatile uint32_t blockCacheLvl3FillRatio_; + + // Stats for DNS. + volatile uint32_t dnsCaches_; + volatile uint64_t dnsCacheAcquires_; + volatile uint64_t dnsCacheHits_; + volatile uint64_t dnsCacheEvictions_; + volatile uint64_t dnsCacheEntries_; + volatile uint64_t dnsCachePendingEntries_; + volatile uint64_t dnsCacheSize_; + volatile uint64_t dnsRequests_; + volatile uint64_t dnsRetries_; + volatile uint64_t dnsResponses_; + volatile uint64_t dnsDiscardedResponses1_; + volatile uint64_t dnsDiscardedResponses2_; + volatile uint64_t dnsDiscardedResponses3_; + volatile uint64_t dnsDiscardedResponses4_; + volatile uint64_t dnsNoAnswer_; + volatile uint64_t dnsErrorsDecoding_; + volatile uint64_t dnsErrorsUnknown_; + volatile uint64_t dnsErrorsFormat_; + volatile uint64_t dnsErrorsFailure_; + volatile uint64_t dnsErrorsName_; + volatile uint64_t dnsErrorsNotImplemented_; + volatile uint64_t dnsErrorsRefused_; + volatile uint64_t dnsErrorsYXDomain_; + volatile uint64_t dnsErrorsYXRRSet_; + volatile uint64_t dnsErrorsNXRRSet_; + volatile uint64_t dnsErrorsNotAuth_; + volatile uint64_t dnsErrorsNotZone_; + + // DDL serial counter. + volatile uint32_t ddlSerial_; + + // Thread counts. + volatile uint32_t workerThreads_; + volatile uint32_t flushThreads_; + volatile uint32_t dnsWorkerThreads_; + volatile uint32_t writerThreads_; + volatile uint32_t apiWorkerThreads_; + volatile uint32_t alterThreads_; + volatile uint32_t coalescingThreads_; + + volatile uint64_t coalescingIndexTaskProcessed_; + volatile uint64_t coalescingMainTaskProcessed_; + + // Flushs + volatile uint64_t flushWait_; + volatile uint64_t flushForced_; + + // Pending jobs + volatile uint32_t tasksPendingFlushTasks_; + volatile uint32_t tasksPendingFlushAllTasks_; + volatile uint32_t tasksPendingFlushJobs_; + volatile uint32_t tasksPendingStringJobs_; + volatile uint32_t tasksPendingIndexJobs_; + volatile uint32_t tasksPendingWriteJobs_; + volatile uint32_t tasksPendingCoalescingMainTasks_; + volatile uint32_t tasksPendingCoalescingIndexTasks_; + volatile uint32_t tasksPendingDnsTasks_; + + // TODO error stats + +public: + + SparrowStatus() { + memset(this, 0, sizeof(*this)); + } + + static SparrowStatus& get() { + return status_; + } +}; + +} + +#endif /* #ifndef _handler_plugin_h_ */ diff --git a/storage/sparrow/sparrow.ini b/storage/sparrow/sparrow.ini new file mode 100644 index 000000000000..f35c3ad87a0b --- /dev/null +++ b/storage/sparrow/sparrow.ini @@ -0,0 +1,300 @@ + # MySQL Server Instance Configuration File + # ---------------------------------------------------------------------- + # Generated by the MySQL Server Instance Configuration Wizard + # + # + # Installation Instructions + # ---------------------------------------------------------------------- + # + # On Linux you can copy this file to /etc/my.cnf to set global options, + # mysql-data-dir/my.cnf to set server-specific options + # (@localstatedir@ for this installation) or to + # ~/.my.cnf to set user-specific options. + # + # On Windows you should keep this file in the installation directory + # of your server (e.g. C:\Program Files\MySQL\MySQL Server X.Y). To + # make sure the server reads the config file use the startup option + # "--defaults-file". + # + # To run run the server from the command line, execute this in a + # command line shell, e.g. + # mysqld --defaults-file="C:\Program Files\MySQL\MySQL Server X.Y\my.ini" + # + # To install the server as a Windows service manually, execute this in a + # command line shell, e.g. + # mysqld --install MySQLXY --defaults-file="C:\Program Files\MySQL\MySQL Server X.Y\my.ini" + # + # And then execute this in a command line shell to start the server, e.g. + # net start MySQLXY + # + # + # Guildlines for editing this file + # ---------------------------------------------------------------------- + # + # In this file, you can use all long options that the program supports. + # If you want to know the options a program supports, start the program + # with the "--help" option. + # + # More detailed information about the individual options can also be + # found in the manual. + # + # + # CLIENT SECTION + # ---------------------------------------------------------------------- + # + # The following options will be read by MySQL client applications. + # Note that only client applications shipped by MySQL are guaranteed + # to read this section. If you want your own MySQL client program to + # honor these values, you need to specify it as an option during the + # MySQL client library initialization. + # + [client] + + port=3306 + + [mysql] + + default-character-set=latin1 + + + # SERVER SECTION + # ---------------------------------------------------------------------- + # + # The following options will be read by the MySQL Server. Make sure that + # you have installed the server correctly (see above) so it reads this + # file. + # + [mysqld] + + #The TCP/IP Port the MySQL Server will listen on + port=3306 + + # + max_allowed_packet=1G + + + #Path to installation directory. All paths are usually resolved relative to this. + basedir="/opt/InfoVista/5ViewOC" + + #Path to the database root + datadir="/opt/InfoVista/5ViewOC/mysql/data" + + # The default character set that will be used when a new schema or table is + # created and no character set is defined + default-character-set=latin1 + + # The default storage engine that will be used when create new tables when + default-storage-engine=myisam + + # Set the SQL mode to strict + sql-mode="STRICT_TRANS_TABLES,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION" + + # The maximum amount of concurrent sessions the MySQL server will + # allow. One of these connections will be reserved for a user with + # SUPER privileges to allow the administrator to login even if the + # connection limit has been reached. + max_connections=100 + + # Query cache is used to cache SELECT results and later return them + # without actual executing the same query once again. Having the query + # cache enabled may result in significant speed improvements, if your + # have a lot of identical queries and rarely changing tables. See the + # "Qcache_lowmem_prunes" status variable to check if the current value + # is high enough for your load. + # Note: In case your tables change very often or if your queries are + # textually different every time, the query cache may result in a + # slowdown instead of a performance improvement. + query_cache_size=0 + + # The number of open tables for all threads. Increasing this value + # increases the number of file descriptors that mysqld requires. + # Therefore you have to make sure to set the amount of open files + # allowed to at least 4096 in the variable "open-files-limit" in + # section [mysqld_safe] + table_cache=256 + + # Maximum size for internal (in-memory) temporary tables. If a table + # grows larger than this value, it is automatically converted to disk + # based table This limitation is for a single table. There can be many + # of them. + tmp_table_size=18M + + + # How many threads we should keep in a cache for reuse. When a client + # disconnects, the client's threads are put in the cache if there aren't + # more than thread_cache_size threads from before. This greatly reduces + # the amount of thread creations needed if you have a lot of new + # connections. (Normally this doesn't give a notable performance + # improvement if you have a good thread implementation.) + thread_cache_size=8 + + #*** MyISAM Specific options + + # The maximum size of the temporary file MySQL is allowed to use while + # recreating the index (during REPAIR, ALTER TABLE or LOAD DATA INFILE. + # If the file-size would be bigger than this, the index will be created + # through the key cache (which is slower). + myisam_max_sort_file_size=100G + + # If the temporary file used for fast index creation would be bigger + # than using the key cache by the amount specified here, then prefer the + # key cache method. This is mainly used to force long character keys in + # large tables to use the slower key cache method to create the index. + myisam_max_extra_sort_file_size=100G + + # If the temporary file used for fast index creation would be bigger + # than using the key cache by the amount specified here, then prefer the + # key cache method. This is mainly used to force long character keys in + # large tables to use the slower key cache method to create the index. + myisam_sort_buffer_size=35M + + # Size of the Key Buffer, used to cache index blocks for MyISAM tables. + # Do not set it larger than 30% of your available memory, as some memory + # is also required by the OS to cache rows. Even if you're not using + # MyISAM tables, you should still set it to 8-64M as it will also be + # used for internal temporary disk tables. + key_buffer_size=128M + + # Size of the buffer used for doing full table scans of MyISAM tables. + # Allocated per thread, if a full scan is needed. + read_buffer_size=64K + read_rnd_buffer_size=256K + + # This buffer is allocated when MySQL needs to rebuild the index in + # REPAIR, OPTIMZE, ALTER table statements as well as in LOAD DATA INFILE + # into an empty table. It is allocated per thread so be careful with + # large settings. + sort_buffer_size=256K + + # ---------------------------------------------------------------------- + # InnoDB options. + # ---------------------------------------------------------------------- + + innodb_data_home_dir="/opt/InfoVista/5ViewOC/mysql/data" + innodb_data_file_path=ibdata1:10M:autoextend + innodb_log_group_home_dir="/opt/InfoVista/5ViewOC/mysql/data" + innodb_buffer_pool_size=16M + innodb_additional_mem_pool_size=2M + innodb_log_file_size=5M + innodb_log_buffer_size=8M + innodb_flush_log_at_trx_commit=1 + innodb_lock_wait_timeout=50 + + # ---------------------------------------------------------------------- + # Sparrow options. + # ---------------------------------------------------------------------- + + # Maximum number of files opened simultaneously in Sparrow. + sparrow_open_files=500 + + # Default maximum lifetime of data in a table, in days. When this lifetime is reached, older data are purged. + sparrow_default_max_lifetime=365 + + # Percentage applied to the number of records in range, in order to favor index scans over table scans. Default is 50%: twice less records in range than computed. + sparrow_index_cost_percentage=50 + + # Maximum disk size used by all Sparrow tables. If 0, all disk space except a security margin of 100MB may be used. + sparrow_max_disk_size=100G + + # Block size used when reading data files (one block per system call). + sparrow_data_read_block_size=4K + + # Block size used when reading index files (one block per system call). + sparrow_index_read_block_size=1M + + # Block size used when reading master and status files (one block per system call). + sparrow_read_block_size=64K + + # Block size used when writing files (one block per system call). + sparrow_write_block_size=16M + + # Block size used when reading data from the network. + sparrow_transfer_block_size=1M + + # Size of the socket buffer used to send data. + sparrow_socket_sndbuf_size=2048 + + # Size of the socket buffer used to receive data. + sparrow_socket_rcvbuf_size=1M + + # Block size used when caching data. + sparrow_cache_block_size=8K + + # Size of level 0 cache. This cache level contains data read recently but not yet used. + sparrow_cache0_size=256M + + # Size of level 1 cache. This cache level contains meta data and index records from recent queries. + sparrow_cache1_size=256M + + # Size of level 2 cache. This cache level contains data records from recent queries. + sparrow_cache2_size=256M + + # Size of level 3 cache. This cache level contains data records from recent inserts. + sparrow_cache3_size=256M + + # When no WHERE condition is specified for the timestamp, use this default time period (in seconds) + # before current time. + # For tests: if set to 0 (allowed only in debug mode), all time partitions are selected. + sparrow_default_time_period=86400 + + # Number of worker threads. + sparrow_worker_threads=2 + + # Number of writer threads. + sparrow_writer_threads=1 + + # Number of worker threads for DNS resolution. + sparrow_dns_worker_threads=1 + + # Number of threads used for online table modifications. + sparrow_alter_threads=1 + + # If false, partition coalescing is disabled. It is enabled by default. + sparrow_coalescing=1 + + # Number of threads used for partition coalescing. + sparrow_coalescing_threads=1 + + # Maximum size of the tuple buffer. When this buffer is full, insertions are blocked until a flush occurs. + sparrow_max_tuple_buffer_size=64M + + # Threshold, in percentage of sparrow_max_tuple_buffer_size, above which data are flushed to disk. + sparrow_tuple_buffer_threshold=50 + + # Interval, in seconds, between data flushes. Data may be written more often if the maximum number of + # buffered tuples is reached before this time interval elapses. + # For tests: if set to 0 (allowed only in debug mode), data are never flushed. + sparrow_flush_interval=300 + + # Maximum size used when generating the tree holding index values. There is one tree in each data and index file. If set to 0, there is no maximum size. + sparrow_index_tree_size=0 + + # TCP port on which Sparrow binds its listener socket. + sparrow_listener_port=11000 + + # Local address on which Sparrow binds its listener socket. + sparrow_listener_address= + + # Maximum number of simultaneous connections on Sparrow. + sparrow_max_connections=50 + + # Action taken when an existing table is detected upon initialization, but with incompatible columns. + # If drop, the existing table is dropped. + # If rename, the existing table is renamed to xxx_YYYYMMDDHHmmSS, where xxx is the name of the Sparrow table and + # YYYYMMDDHHmmSS the timestamp of the detection. + sparrow_incompatible_table=drop + + # Unix only: disk sector size of the partition where the database resides. + sparrow_disk_sector_size=512 + + # Maximum size of the DNS cache. + sparrow_max_dns_cache_size=64M + + # Default DNS query timeout, in milliseconds. + sparrow_dns_timeout=200 + + # Default maximum number of retries when a query times out. If multiple servers are available for the query, the retry is performed on the next server. + sparrow_dns_retries=2 + + # Additional file systems for Sparrow data and index files. + sparrow_filesystems= diff --git a/storage/sparrow/udf/CMakeLists.txt b/storage/sparrow/udf/CMakeLists.txt new file mode 100644 index 000000000000..3ebf93ff1ed1 --- /dev/null +++ b/storage/sparrow/udf/CMakeLists.txt @@ -0,0 +1,18 @@ +SET(SPARROWUDF_SOURCES + udf.h + udf.cc + operator.h + operator.cc + udfargument.h + udfargument.cc) + +ADD_DEFINITIONS(-DMYSQL_SERVER -DHAVE_DLOPEN) +ADD_LIBRARY(sparrowudf MODULE ${SPARROWUDF_SOURCES}) + +# sparrowudf depends on strings +IF(WIN32) + IF(MSVC) + SET_TARGET_PROPERTIES(sparrowudf PROPERTIES LINK_FLAGS "/DEF:${CMAKE_CURRENT_SOURCE_DIR}/udf.def") + ENDIF() + TARGET_LINK_LIBRARIES(sparrowudf strings) +ENDIF() diff --git a/storage/sparrow/udf/operator.cc b/storage/sparrow/udf/operator.cc new file mode 100644 index 000000000000..4fb77606f549 --- /dev/null +++ b/storage/sparrow/udf/operator.cc @@ -0,0 +1 @@ +#include "operator.h" diff --git a/storage/sparrow/udf/operator.h b/storage/sparrow/udf/operator.h new file mode 100644 index 000000000000..ddd3ad5bc96a --- /dev/null +++ b/storage/sparrow/udf/operator.h @@ -0,0 +1,419 @@ +#ifndef _operator_h_ +#define _operator_h_ + +#include "udfargument.h" + +// comment out to log values in directory : LOG_REPOSITORY_OPERATOR +//#define LOG_OPERATOR + +#ifdef LOG_OPERATOR +#ifndef LOG_REPOSITORY_OPERATOR +// directory where files are written +#define LOG_REPOSITORY_OPERATOR "D:/debug" +#endif LOG_REPOSITORY_OPERATOR +#endif //#ifdef LOG_OPERATOR + +//////////////////////////////////////////////////////////////////////////////// +// OperatorData +//////////////////////////////////////////////////////////////////////////////// + +template +class OperatorData +{ +public: + + // constructor + OperatorData(); + + // virtual destructor + virtual ~OperatorData(); + + // return determiner + const T& getDeterminer() const; + + // return indicator + const U& getIndicator() const; + + // empty the structure + void empty(); + // tell whether structure is empty or not (has been used to store values) + bool isEmpty() const; + + // update determiner and indicator if determiner is greater than the stored one + void updateIfGreater(UDF_ARGS* args); + +#ifdef LOG_OPERATOR + // log current determiner and indicator + void logContent(); +#endif //#ifdef LOG_OPERATOR + +protected: + // the maximum reached value of the determiner + T determiner_; + + // the indicator value that must be finally returned + U indicator_; + + // flag indicating whether the structure has been used at least one time and so contains valid values + bool empty_; + +#ifdef LOG_OPERATOR + // create a log file + void createLogFile(const char* szLogFilePath); + // close a log file + void closeLogFile(); + // write data type + static void writeType(FILE * fp, enum Item_result arg_type); + // write determiner + static void writeDeterminer(FILE * fp, const T& determiner); + // write indicator + static void writeIndicator(FILE * fp, const U& indicator); + // write determiner and indicator + static void writeValues(FILE * fp, const T& determiner, const U& indicator); + // log input + static void logInput(FILE * fp, const T& determiner, const U& indicator); + // file handler + FILE * fp_; +#endif //#ifdef LOG_OPERATOR +}; + + + + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + + + +//////////////////////////////////////////////////////////////////////////////// +// OperatorData +//////////////////////////////////////////////////////////////////////////////// + +// constructor +template +inline OperatorData::OperatorData() +{ + empty_ = true; +#ifdef LOG_OPERATOR + // create log file + char filepath[1024]; + clock_t mytime = clock(); + sprintf(filepath,"%s/udf_operator_%ld.log",LOG_REPOSITORY_OPERATOR,mytime); + OperatorData::createLogFile(filepath); +#endif //#ifdef LOG_OPERATOR +} + +// destructor +template +inline OperatorData::~OperatorData() +{ +#ifdef LOG_OPERATOR + closeLogFile(); +#endif //#ifdef LOG_OPERATOR +} + +// return determiner +template +inline const T& OperatorData::getDeterminer() const +{ + return determiner_; +} + +// return indicator +template +inline const U& OperatorData::getIndicator() const +{ + return indicator_; +} + +// empty the structure +template +inline void OperatorData::empty() +{ + empty_ = true; +#ifdef LOG_OPERATOR + // update log file + if (fp_ == NULL) return; + + clock_t mytime = clock(); + fprintf(fp_,"\n\n===== EMPTY =====\n"); + fprintf(fp_,"\n%ld\n",mytime); + fflush(fp_); +#endif //#ifdef LOG_OPERATOR +} + +// tell whether structure is empty or not (has been used to store values) +template +inline bool OperatorData::isEmpty() const +{ + return empty_; +} + +// update determiner and indicator if determiner is greater than the stored one +template +inline void OperatorData::updateIfGreater(UDF_ARGS* args) +{ + // Test arguments + if (args == NULL) return; + + // If no new determiner value, can't do anything + if (args->args[1] == NULL) return; + + // Retrieves new determiner value + T newDeterminer(args->args[1],args->lengths[1]); + +#ifdef LOG_OPERATOR + U logIndicator(args->args[0],args->lengths[0]); + OperatorData::logInput(fp_,newDeterminer,logIndicator); +#endif //#ifdef LOG_OPERATOR + + // If first pass, store determiner and indicator + if(empty_) { + + // Store determiner and indicator + determiner_ = newDeterminer; + indicator_.setValue(args->args[0],args->lengths[0]); + empty_ = false; + +#ifdef LOG_OPERATOR + logContent(); +#endif //#ifdef LOG_OPERATOR + + } else { + // Test if new determiner is greater than the stored one + //if (newDeterminer.getValue() > determiner_.getValue()) { + if (newDeterminer > determiner_) { + + // Store determiner and indicator + determiner_ = newDeterminer; + indicator_.setValue(args->args[0],args->lengths[0]); + +#ifdef LOG_OPERATOR + logContent(); +#endif //#ifdef LOG_OPERATOR + + } + } +} + +// create a log file +#ifdef LOG_OPERATOR +template +inline void OperatorData::createLogFile(const char* szLogFilePath) +{ + fp_ = fopen(szLogFilePath,"w"); + + if (fp_ == NULL) return; + + clock_t mytime = clock(); + fprintf(fp_,"\n\n===== CREATE =====\n"); + fprintf(fp_,"%ld\n",mytime); + + fflush(fp_); +} + +// close a log file +template +inline void OperatorData::closeLogFile() +{ + if (fp_ == NULL) return; + + fclose(fp_); +} + +// write data type +template +inline void OperatorData::writeType(FILE * fp, enum Item_result argType) +{ + if (fp == NULL) return; + + switch(argType) { + case STRING_RESULT: + fprintf(fp,"\t STRING_RESULT"); + break; + case REAL_RESULT: + fprintf(fp,"\t REAL_RESULT"); + break; + case INT_RESULT: + fprintf(fp,"\t INT_RESULT"); + break; + case ROW_RESULT: + fprintf(fp,"\t ROW_RESULT"); + break; + case DECIMAL_RESULT: + fprintf(fp,"\t DECIMAL_RESULT"); + break; + default: + break; + } + fflush(fp); +} + +// write determiner +template +inline void OperatorData::writeDeterminer(FILE * fp, const T & input) +{ + if (fp == NULL) return; + + fprintf(fp,"\tdeterminer: "); + switch(input.getType()) { + case STRING_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %s",input.getValue()); + } + fprintf(fp,"\t STRING_RESULT"); + break; + case REAL_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %lf",input.getValue()); + } + fprintf(fp,"\t REAL_RESULT"); + break; + case INT_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %lld",input.getValue()); + } + fprintf(fp,"\t INT_RESULT"); + break; + case ROW_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t ROW_RESULT"); + } + break; + case DECIMAL_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %s",input.getValue()); + } + fprintf(fp,"\t DECIMAL_RESULT"); + break; + default: + break; + } + fflush(fp); +} + +// write indicator +template +inline void OperatorData::writeIndicator(FILE * fp, const U & input) +{ + if (fp == NULL) return; + + fprintf(fp,"\tindicator: "); + switch(input.getType()) { + case STRING_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %s",input.getValue()); + } + fprintf(fp,"\t STRING_RESULT"); + break; + case REAL_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %lf",input.getValue()); + } + fprintf(fp,"\t REAL_RESULT"); + break; + case INT_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %lld",input.getValue()); + } + fprintf(fp,"\t INT_RESULT"); + break; + case ROW_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t ROW_RESULT"); + } + break; + case DECIMAL_RESULT: + if (input.isNull()) + { + fprintf(fp,"\t null"); + } + else + { + fprintf(fp,"\t %s",input.getValue()); + } + fprintf(fp,"\t DECIMAL_RESULT"); + break; + default: + break; + } + fflush(fp); +} + +// write determiner and indicator +template +inline void OperatorData::writeValues(FILE * fp, const T& determiner, const U& indicator) +{ + if (fp == NULL) return; + + OperatorData::writeDeterminer(fp, determiner); + OperatorData::writeIndicator(fp, indicator); +} + +// log input +template +inline void OperatorData::logInput(FILE * fp, const T& determiner, const U& indicator) +{ + if (fp == NULL) return; + + fprintf(fp,"\nInput"); + OperatorData::writeValues(fp, determiner, indicator); +} + +// log current determiner and indicator +template +inline void OperatorData::logContent() +{ + if (fp_ == NULL) return; + + fprintf(fp_,"\nStored values"); + OperatorData::writeValues(fp_, determiner_, indicator_); +} +#endif //#ifdef LOG_OPERATOR + +#endif //#define _operator_h_ diff --git a/storage/sparrow/udf/udf.cc b/storage/sparrow/udf/udf.cc new file mode 100644 index 000000000000..84f78df0ff2d --- /dev/null +++ b/storage/sparrow/udf/udf.cc @@ -0,0 +1,594 @@ +/* +** file of UDF (user definable functions) that are dynamicly loaded +** into the standard mysqld core. +** +** The functions name, type and shared library is saved in the new system +** table 'func'. To be able to create new functions one must have write +** privilege for the database 'mysql'. If one starts MySQL with +** --skip-grant, then UDF initialization will also be skipped. +** +** Syntax for the new commands are: +** create function returns {string|real|integer} +** soname +** drop function +** +** Each defined function may have a xxxx_init function and a xxxx_deinit +** function. The init function should alloc memory for the function +** and tell the main function about the max length of the result +** (for string functions), number of decimals (for double functions) and +** if the result may be a null value. +** +** If a function sets the 'error' argument to 1 the function will not be +** called anymore and mysqld will return NULL for all calls to this copy +** of the function. +** +** All strings arguments to functions are given as string pointer + length +** to allow handling of binary data. +** Remember that all functions must be thread safe. This means that one is not +** allowed to alloc any global or static variables that changes! +** If one needs memory one should alloc this in the init function and free +** this on the __deinit function. +** +** Note that the init and __deinit functions are only called once per +** SQL statement while the value function may be called many times +** +** Function 'value_at' returns an indicator value corresponding to the maximum value of the given determiner. + +** Function 'maxt' returns a timestamp corresponding to the maximum value of the given determiner. +** +** A dynamically loadable file should be compiled shared. +** (something like: gcc -shared -o my_func.so myfunc.cc). +** You can easily get all switches right by doing: +** cd sql ; make udf_example.o +** Take the compile line that make writes, remove the '-c' near the end of +** the line and add -shared -o udf_example.so to the end of the compile line. +** The resulting library (udf_example.so) should be copied to some dir +** searched by ld. (/usr/lib ?) +** If you are using gcc, then you should be able to create the udf_example.so +** by simply doing 'make udf_example.so'. +** +** After the library is made one must notify mysqld about the new +** functions with the commands: +** +** CREATE AGGREGATE FUNCTION value_at RETURNS REAL SONAME "udf_example.so"; +** CREATE AGGREGATE FUNCTION maxt RETURNS STRING SONAME "udf_example.so"; +** +** After this the functions will work exactly like native MySQL functions. +** Functions should be created only once. +** +** The functions can be deleted by: +** +** DROP FUNCTION value_at; +** DROP FUNCTION maxt; +** +** The CREATE FUNCTION and DROP FUNCTION update the func@mysql table. All +** Active function will be reloaded on every restart of server +** (if --skip-grant-tables is not given) +** +** If you ge problems with undefined symbols when loading the shared +** library, you should verify that mysqld is compiled with the -rdynamic +** option. +** +** If you can't get AGGREGATES to work, check that you have the column +** 'type' in the mysql.func table. If not, run 'mysql_upgrade'. +** +*/ + +#include "udf.h" + +#include +#include +#include + +#include + +/*#if defined(MYSQL_SERVER) +#include +#else +// when compiled as standalone +#include +#define strmov(a,b) stpcpy(a,b) +#endif*/ + +#include "operator.h" + +#ifdef HAVE_DLOPEN + +// Method to convert a sql timestamp YYYY-MM-DD HH:MM:SS to YYYYMMDDHHMMSS +longlong timestampToLongLong(const char* timestamp); +// Methods to parse a sql timestamp YYYYMMDDHHMMSS +longlong getYear(long long sqltimestamp); +longlong getMonth(long long sqltimestamp); +longlong getDay(long long sqltimestamp); +longlong getHour(long long sqltimestamp); +longlong getMinute(long long sqltimestamp); +longlong getSecond(long long sqltimestamp); +longlong getDate(long long sqltimestamp); +longlong getTime(long long sqltimestamp); + +typedef OperatorData ValueAtData; +typedef OperatorData MaxtData; + +/* +** Syntax for the new aggregate commands are: +** create aggregate function returns {string|real|integer} +** soname +** +** Syntax for percentile: percentile( t.indicator, t.determiner ) +** with t.indicator=double, t.determiner=double +*/ + +class PercentileData { +public: + PercentileData() : empty_(true), percent_(0.0) {;} + + void clear() { data_.clear(); empty_=true; percent_=0.0; } + void add(UDF_ARGS* args, char* error); + double compute(bool& isNull, char* error); + +public: + bool empty_; + double percent_; + std::vector data_; +}; + +void PercentileData::add(UDF_ARGS* args, char* error) { + // Test arguments + if (args == NULL) { + *error = 1; return; + } + + if (args->args[1] == NULL) { + *error = 1; return; + } + + UdfArgumentReal arg(args->args[1],args->lengths[1]); + if (arg.isNull()) { + *error = 1; return; + } + + double value = arg.getValue(); + if (value < 0.0 || value > 100.0) { + *error = 1; return; + } + + if ( empty_ ) { + percent_ = value; + empty_ = false; + } else if (percent_ != value) { + *error = 1; return; + } + + if (args->args[0] != NULL) { + UdfArgumentReal arg(args->args[0],args->lengths[0]); + if (!arg.isNull()) { + double value = arg.getValue(); + data_.push_back(value); + } + } +} + +double PercentileData::compute(bool& isNull, char* error) { + if (empty_ || data_.empty()) { + *error = 1; + isNull = true; + return 0; + } + isNull = false; + int position = static_cast((data_.size()-1)*percent_/100); + std::nth_element(data_.begin(), data_.begin()+position, data_.end()); + double percentile=data_[position]; + return percentile; +} + + +// percentile Aggregate Function. + +// Allocate memory and initialize parameters +bool +percentile_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) +{ + PercentileData* data = NULL; + + // Test argument count + if (args->arg_count != 2) + { + strcpy( message, "wrong number of arguments: percentile() requires two arguments" ); + return 1; + } + + // Allocate working structure + data = new (std::nothrow)PercentileData; + if (data == NULL) + { + strcpy(message,"Couldn't allocate memory"); + return 1; + } + + // Force type of arguments + args->arg_type[0] = REAL_RESULT; + args->arg_type[1] = REAL_RESULT; + + initid->maybe_null = 1; // The result may be null + initid->decimals = 4; // We want 4 decimals in the result + initid->max_length = 20; // 6 digits + . + 10 decimals + + // Initialize flag + data->clear(); + + // Store working structure + initid->ptr = (char*)data; + + return 0; +} + +// Deallocate memory +void +percentile_deinit( UDF_INIT* initid ) +{ + // Deallocate working structure + void *void_ptr= initid->ptr; + PercentileData *data = static_cast(void_ptr); + delete data; +} + +// This is needed to get things to work in MySQL 4.1.1 and above +void +percentile_clear(UDF_INIT* initid, [[maybe_unused]] char* is_null, + [[maybe_unused]] char* error) +{ + PercentileData *data = (PercentileData*)(initid->ptr); + + // Initialize flag + data->clear(); +} + +// Treats a new row +void +percentile_add(UDF_INIT* initid, UDF_ARGS* args, + [[maybe_unused]] char* is_null, + [[maybe_unused]] char* error) +{ + PercentileData *data = (PercentileData*)(initid->ptr); + + data->add(args, error); +} + +// This is only for MySQL 4.0 compatibility +void +percentile_reset(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* error) +{ + percentile_clear(initid, is_null, error); + percentile_add(initid, args, is_null, error); +} + +// Return the indicator value, if any +double +percentile(UDF_INIT* initid, [[maybe_unused]] UDF_ARGS* args, + char* is_null, [[maybe_unused]] char* error) +{ + PercentileData *data = (PercentileData*)(initid->ptr); + bool isNull = false; + double result = data->compute(isNull, error); + // If nothing happened, return null + if (isNull) { + *is_null = 1; + return 0.0; + } + *is_null = 0; + return result; +} + + + +/* +** Syntax for the new aggregate commands are: +** create aggregate function returns {string|real|integer} +** soname +** +** Syntax for value_at: value_at( t.indicator, t.determiner ) +** with t.indicator=double, t.determiner=double +*/ + +// Value at Aggregate Function. + +// Allocate memory and initialize parameters +bool +value_at_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) +{ + ValueAtData* data = NULL; + + // Test argument count + if (args->arg_count != 2) + { + strcpy(message,"wrong number of arguments: value_at() requires two arguments"); + return 1; + } + + // Don't test input type + /* + if ((args->arg_type[0] != REAL_RESULT) || (args->arg_type[1] != REAL_RESULT) ) + { + strcpy( + message, + "wrong argument type: value_at() requires an REAL and a REAL" + ); + return 1; + }*/ + + // Allocate working structure + data = new (std::nothrow) ValueAtData; + if (data == NULL) + { + strcpy(message,"Couldn't allocate memory"); + return 1; + } + + // Force type of arguments + args->arg_type[0] = data->getIndicator().getType(); + args->arg_type[1] = data->getDeterminer().getType(); + + initid->maybe_null = 1; // The result may be null + initid->decimals = 4; // We want 4 decimals in the result + initid->max_length = 20; // 6 digits + . + 10 decimals + + // Initialize flag + data->empty(); + + // Store working structure + initid->ptr = (char*)data; + + return 0; +} + +// Deallocate memory +void +value_at_deinit( UDF_INIT* initid ) +{ + // Deallocate working structure + void *void_ptr= initid->ptr; + ValueAtData *data = static_cast(void_ptr); + delete data; +} + +// This is needed to get things to work in MySQL 4.1.1 and above +void +value_at_clear(UDF_INIT* initid, [[maybe_unused]] char* is_null, + [[maybe_unused]] char* message) +{ + ValueAtData *data = (ValueAtData*) initid->ptr; + + // Initialize flag + data->empty(); +} + +// Treats a new row +void +value_at_add(UDF_INIT* initid, UDF_ARGS* args, + [[maybe_unused]] char* is_null, + [[maybe_unused]] char* message) +{ + ValueAtData *data = (ValueAtData*) initid->ptr; + data->updateIfGreater(args); +} + +// This is only for MySQL 4.0 compability +void +value_at_reset(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message) +{ + value_at_clear(initid, is_null, message); + value_at_add(initid, args, is_null, message); +} + +// Return the indicator value, if any +double +value_at( UDF_INIT* initid, [[maybe_unused]] UDF_ARGS* args, + char* is_null, [[maybe_unused]] char* error) +{ + ValueAtData *data = (ValueAtData*) initid->ptr; + + // If nothing happened, return null + if (data->isEmpty()) + { + *is_null = 1; + return 0.0; + } + +#ifdef LOG_OPERATOR + data->logContent(); +#endif //#ifdef LOG_OPERATOR + + // Return the indicator value + if (data->getIndicator().isNull()) + { + *is_null = 1; + return 0.0; + } + else + { + *is_null = 0; + return data->getIndicator().getValue(); + } +} + + +/* +** Syntax for the new aggregate commands are: +** create aggregate function returns {string|real|integer} +** soname +** +** Syntax for maxt: value_at( t.indicator, t.determiner ) +** with t.indicator=longlong, t.determiner=double +*/ + + +// maxt Aggregate Function. + +// Allocate memory and initialize parameters +bool +maxt_init( UDF_INIT* initid, UDF_ARGS* args, char* message ) +{ + MaxtData* data = NULL; + + // Test argument count + if (args->arg_count != 2) + { + strcpy(message,"wrong number of arguments: maxt() requires two arguments"); + return 1; + } + + // Don't test input type + /* + if ((args->arg_type[0] != REAL_RESULT) || (args->arg_type[1] != REAL_RESULT) ) + { + strcpy( + message, + "wrong argument type: maxt() requires an REAL and a REAL" + ); + return 1; + }*/ + + // Allocate working structure + data = new (std::nothrow) MaxtData; + if (data == NULL) + { + strcpy(message,"Couldn't allocate memory"); + return 1; + } + + // Force type of arguments + args->arg_type[0] = data->getIndicator().getType(); + args->arg_type[1] = data->getDeterminer().getType(); + + initid->maybe_null = 1; // The result may be null */ + initid->decimals = 4; // We want 4 decimals in the result */ + initid->max_length = 20; // 6 digits + . + 10 decimals + + // Initialize flag + data->empty(); + + // Store working structure + initid->ptr = (char*)data; + + return 0; +} + +// Deallocate memory +void +maxt_deinit( UDF_INIT* initid ) +{ + // Deallocate working structure + void *void_ptr = initid->ptr; + MaxtData *data = static_cast(void_ptr); + delete data; +} + +// This is needed to get things to work in MySQL 4.1.1 and above +void +maxt_clear(UDF_INIT* initid, [[maybe_unused]] char* is_null, + [[maybe_unused]] char* message) +{ + MaxtData *data = (MaxtData*) initid->ptr; + + // Initialize flag + data->empty(); +} + +// Treats a new row +void +maxt_add(UDF_INIT* initid, UDF_ARGS* args, + [[maybe_unused]] char* is_null, + [[maybe_unused]] char* message) +{ + MaxtData *data = (MaxtData*) initid->ptr; + data->updateIfGreater(args); +} + +// This is only for MySQL 4.0 compability +void +maxt_reset(UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* message) +{ + maxt_clear(initid, is_null, message); + maxt_add(initid, args, is_null, message); +} + +// Return the indicator value, if any +longlong +maxt( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ) +{ + MaxtData *data = (MaxtData*) initid->ptr; + + // If nothing happened, return null + if (data->isEmpty()) + { + *is_null = 1; + return 0; + } + +#ifdef LOG_OPERATOR + data->logContent(); +#endif //#ifdef LOG_OPERATOR + + // Return the indicator value + if (data->getIndicator().isNull()) + { + *is_null = 1; + return 0; + } + else + { + *is_null = 0; + return data->getIndicator().getValue(); + } +} + +// Method to convert a sql timestamp YYYY-MM-DD HH:MM:SS to YYYYMMDDHHMMSS +longlong timestampToLongLong(const char* timestamp) +{ + long year,month,day,hour,minute,second; + sscanf(timestamp,"%ld-%ld-%ld %ld:%ld:%ld",&year,&month,&day,&hour,&minute,&second); + return + year * 10000000000L + \ + month * 100000000L + \ + day * 1000000L + \ + hour * 10000L + \ + minute * 100L + \ + second \ + ; +} + +// Methods to parse a sql timestamp YYYYMMDDHHMMSS +longlong getYear(long long sqltimestamp) +{ + return (sqltimestamp / 10000000000L); +} +longlong getMonth(long long sqltimestamp) +{ + return ((sqltimestamp % 10000000000L) / 100000000L); +} +longlong getDay(long long sqltimestamp) +{ + return ((sqltimestamp % 100000000L) / 1000000L); +} +longlong getHour(long long sqltimestamp) +{ + return ((sqltimestamp % 1000000L) / 10000L); +} +longlong getMinute(long long sqltimestamp) +{ + return ((sqltimestamp % 10000L) / 100L); +} +longlong getSecond(long long sqltimestamp) +{ + return (sqltimestamp % 100L); +} +longlong getDate(long long sqltimestamp) +{ + return (sqltimestamp / 1000000L); +} +longlong getTime(long long sqltimestamp) +{ + return (sqltimestamp % 1000000L); +} + +#endif /* HAVE_DLOPEN */ diff --git a/storage/sparrow/udf/udf.def b/storage/sparrow/udf/udf.def new file mode 100644 index 000000000000..931d8f281fc0 --- /dev/null +++ b/storage/sparrow/udf/udf.def @@ -0,0 +1,14 @@ +LIBRARY udf +EXPORTS + value_at_init + value_at_deinit + value_at_reset + value_at_add + value_at_clear + value_at + percentile_init + percentile_deinit + percentile_reset + percentile_add + percentile_clear + percentile diff --git a/storage/sparrow/udf/udf.h b/storage/sparrow/udf/udf.h new file mode 100644 index 000000000000..e48c6f53f510 --- /dev/null +++ b/storage/sparrow/udf/udf.h @@ -0,0 +1,48 @@ +#ifndef _udf_h_ +#define _udf_h_ + +// Client library users on Windows need this macro defined here. +//#include +#include "mysql_com.h" + +// Linkage and calling conventions. +#if defined (_WIN32) || defined (__WIN32__) || defined (WIN32) + +#define DLLIMPORT __declspec(dllimport) +#define DLLEXPORT __declspec(dllexport) + +#define DLLCALL __stdcall + +#else // !( _WIN32 || __WIN32__ || WIN32) + +#define DLLIMPORT +#if defined(__GNUC__) && __GNUC__ > 3 +#define DLLEXPORT __attribute__ ((visibility("default"))) +#else +#define DLLEXPORT +#endif + +#define DLLCALL + +#endif // !( _WIN32 || __WIN32__ || WIN32) + + +extern "C" { + + bool value_at_init( UDF_INIT* initid, UDF_ARGS* args, char* message ); + void value_at_deinit( UDF_INIT* initid ); + void value_at_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); + void value_at_clear( UDF_INIT* initid, char* is_null, char *error ); + void value_at_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); + double value_at( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); + + bool percentile_init( UDF_INIT* initid, UDF_ARGS* args, char* message ); + void percentile_deinit( UDF_INIT* initid ); + void percentile_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); + void percentile_clear( UDF_INIT* initid, char* is_null, char *error ); + void percentile_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); + double percentile( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error ); + +}; + +#endif //#define _udf_h_ diff --git a/storage/sparrow/udf/udfargument.cc b/storage/sparrow/udf/udfargument.cc new file mode 100644 index 000000000000..5dc9e92346ba --- /dev/null +++ b/storage/sparrow/udf/udfargument.cc @@ -0,0 +1,81 @@ +#include "udfargument.h" + +bool operator > ( const UdfArgumentString& a1, const UdfArgumentString& a2 ) +{ + if (a1.null_) + { + return false; + } + + if (a2.null_) + { + return true; + } + + if (a1.value_ == NULL) + { + return false; + } + + if (a2.value_ == NULL) + { + return true; + } + + return strcmp(a1.value_,a2.value_) > 0; +} + +bool operator > ( const UdfArgumentReal& a1, const UdfArgumentReal& a2 ) +{ + if (a1.null_) + { + return false; + } + + if (a2.null_) + { + return true; + } + + return a1.value_ > a2.value_; +} + +bool operator > ( const UdfArgumentInt& a1, const UdfArgumentInt& a2 ) +{ + if (a1.null_) + { + return false; + } + + if (a2.null_) + { + return true; + } + + return a1.value_ > a2.value_; +} + +bool operator > ( const UdfArgumentDecimal& a1, const UdfArgumentDecimal& a2 ) +{ + if (a1.null_) + { + return false; + } + + if (a2.null_) + { + return true; + } + + if (a1.value_ == NULL) + { + return false; + } + + if (a2.value_ == NULL) + { + return true; + } + + return atof(a1.value_) > atof(a2.value_); +} diff --git a/storage/sparrow/udf/udfargument.h b/storage/sparrow/udf/udfargument.h new file mode 100644 index 000000000000..ccd056552ff9 --- /dev/null +++ b/storage/sparrow/udf/udfargument.h @@ -0,0 +1,423 @@ +#ifndef _udfargument_h_ +#define _udfargument_h_ + +#include "my_sys.h" +#include +#include +//#include +//#include "mysql_com.h" +#include "mysql/udf_registration_types.h" + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgument +//////////////////////////////////////////////////////////////////////////////// + +template +class UdfArgument +{ +public: + + // virtual destructor + virtual ~UdfArgument(); + + // return value + const T& getValue() const; + + // return type + virtual Item_result getType() const = 0; + + // return whether the argument is null or not + bool isNull() const; + +protected: + + // constructor + UdfArgument(); + + // set value + virtual void setValue(char* arg, unsigned long length) = 0; + + // the maximum reached value of the determiner + T value_; + + // null flag + bool null_; +}; + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentString wraps char* +//////////////////////////////////////////////////////////////////////////////// + +class UdfArgumentString : public UdfArgument +{ + +friend bool operator > ( const UdfArgumentString& a1, const UdfArgumentString& a2 ); + +public: + + // constructor + UdfArgumentString(); + UdfArgumentString(char* arg, unsigned long length); + + // virtual destructor + virtual ~UdfArgumentString(); + + // assignment + UdfArgumentString& operator = (const UdfArgumentString& src); + + // set value + virtual void setValue(char* arg, unsigned long length); + + // return type of determiner + virtual Item_result getType() const; +}; + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentReal wraps double +//////////////////////////////////////////////////////////////////////////////// + +class UdfArgumentReal : public UdfArgument +{ + +friend bool operator > ( const UdfArgumentReal& a1, const UdfArgumentReal& a2 ); + +public: + + // constructor + UdfArgumentReal(); + UdfArgumentReal(char* arg, unsigned long length); + + // virtual destructor + virtual ~UdfArgumentReal(); + + // assignment + UdfArgumentReal& operator = (const UdfArgumentReal& src); + + // set value + virtual void setValue(char* arg, unsigned long length); + + // return type of determiner + Item_result getType() const; +}; + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentInt wraps long long +//////////////////////////////////////////////////////////////////////////////// + +class UdfArgumentInt : public UdfArgument +{ + +friend bool operator > ( const UdfArgumentInt& a1, const UdfArgumentInt& a2 ); + +public: + + // constructor + UdfArgumentInt(); + UdfArgumentInt(char* arg, unsigned long length); + + // virtual destructor + virtual ~UdfArgumentInt(); + + // assignment + UdfArgumentInt& operator = (const UdfArgumentInt& src); + + // set value + virtual void setValue(char* arg, unsigned long length); + + // return type of determiner + Item_result getType() const; +}; + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentDecimal wraps char* +//////////////////////////////////////////////////////////////////////////////// + +class UdfArgumentDecimal : public UdfArgumentString +{ + +friend bool operator > ( const UdfArgumentDecimal& a1, const UdfArgumentDecimal& a2 ); + +public: + + // constructor + UdfArgumentDecimal(); + UdfArgumentDecimal(char* arg, unsigned long length); + + // virtual destructor + virtual ~UdfArgumentDecimal(); + + // assignment + UdfArgumentDecimal& operator = (const UdfArgumentDecimal& src); + + // set value + virtual void setValue(char* arg, unsigned long length); + + // return type of determiner + Item_result getType() const; +}; + + + + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + + + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgument +//////////////////////////////////////////////////////////////////////////////// + +// constructor +template +inline UdfArgument::UdfArgument() +{ + null_ = true; +} + +// destructor +template +inline UdfArgument::~UdfArgument() +{ +} + +// return value +template +inline const T& UdfArgument::getValue() const +{ + return value_; +} + +// return whether the argument is null or not +template +inline bool UdfArgument::isNull() const +{ + return null_; +} + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentString wraps char* +//////////////////////////////////////////////////////////////////////////////// + +// constructor +inline UdfArgumentString::UdfArgumentString() +{ + value_ = NULL; +} + +inline UdfArgumentString::UdfArgumentString(char* arg, unsigned long length) +{ + value_ = NULL; + setValue(arg, length); +} + +// destructor +inline UdfArgumentString::~UdfArgumentString() +{ + if (value_ != NULL) + { + delete value_; + } +} + +// assignment +inline UdfArgumentString& UdfArgumentString::operator = (const UdfArgumentString& src) +{ + if (&src == this) + { + return *this; + } + + setValue(src.value_, (src.value_==NULL)? 0:(unsigned long)strlen(src.value_)); + + return *this; +} + +// set value +inline void UdfArgumentString::setValue(char* arg, unsigned long length) +{ + if (arg == NULL) + { + null_ = true; + } + else + { + if (value_ != NULL) + { + delete value_; + value_ = NULL; + } + + value_ = new (std::nothrow) char[length+1]; + + if (value_ != NULL) + { + value_[length] = 0x00; + if (length > 0) + { + strncpy(value_,arg,length); + } + } + + null_ = (value_ == NULL); + } +} + +// return type +inline Item_result UdfArgumentString::getType() const +{ + return STRING_RESULT; +} + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentInt wraps long long +//////////////////////////////////////////////////////////////////////////////// + +// constructor +inline UdfArgumentInt::UdfArgumentInt() +{ +} + +inline UdfArgumentInt::UdfArgumentInt(char* arg, unsigned long length) +{ + setValue(arg, length); +} + +// destructor +inline UdfArgumentInt::~UdfArgumentInt() +{ +} + +// assignment +inline UdfArgumentInt& UdfArgumentInt::operator = (const UdfArgumentInt& src) +{ + if (&src == this) + { + return *this; + } + + value_ = src.value_; + null_ = src.null_; + + return *this; +} + +// set value +inline void UdfArgumentInt::setValue(char* arg, unsigned long length) +{ + if (arg == NULL) + { + null_ = true; + } + else + { + value_ = *((long long*) arg); + null_ = false; + } +} + +// return type +inline Item_result UdfArgumentInt::getType() const +{ + return INT_RESULT; +} + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentReal wraps double +//////////////////////////////////////////////////////////////////////////////// + +// constructor +inline UdfArgumentReal::UdfArgumentReal() +{ +} + +inline UdfArgumentReal::UdfArgumentReal(char* arg, unsigned long length) +{ + setValue(arg, length); +} + +// destructor +inline UdfArgumentReal::~UdfArgumentReal() +{ +} + +// assignment +inline UdfArgumentReal& UdfArgumentReal::operator = (const UdfArgumentReal& src) +{ + if (&src == this) + { + return *this; + } + + value_ = src.value_; + null_ = src.null_; + + return *this; +} + +// set value +inline void UdfArgumentReal::setValue(char* arg, unsigned long length) +{ + if (arg == NULL) + { + null_ = true; + } + else + { + value_ = *((double*) arg); + null_ = false; + } +} + +// return type +inline Item_result UdfArgumentReal::getType() const +{ + return REAL_RESULT; +} + +//////////////////////////////////////////////////////////////////////////////// +// UdfArgumentDecimal wraps char* +//////////////////////////////////////////////////////////////////////////////// + +// constructor +inline UdfArgumentDecimal::UdfArgumentDecimal() : UdfArgumentString() +{ +} + +inline UdfArgumentDecimal::UdfArgumentDecimal(char* arg, unsigned long length) : UdfArgumentString(arg, length) +{ +} + +// destructor +inline UdfArgumentDecimal::~UdfArgumentDecimal() +{ +} + +// assignment +inline UdfArgumentDecimal& UdfArgumentDecimal::operator = (const UdfArgumentDecimal& src) +{ + if (&src == this) + { + return *this; + } + + setValue(src.value_, (src.value_==NULL)? 0:(unsigned long)strlen(src.value_)); + + return *this; +} + +// set value +inline void UdfArgumentDecimal::setValue(char* arg, unsigned long length) +{ + UdfArgumentString::setValue(arg, length); +} + +// return type +inline Item_result UdfArgumentDecimal::getType() const +{ + return DECIMAL_RESULT; +} + + + +#endif //#define _udfargument_h_ From 639f3b601aebda5bb5d15d5daf9e8e01563e60ef Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Mon, 20 Jan 2025 17:43:42 +0100 Subject: [PATCH 2/8] AB#340942 Added build scripts --- .gitignore | 4 +- build_scripts/conan/conanfile.mysqlapi.py | 18 + build_scripts/conan/conanfile.sparrowapi.py | 18 + .../conan/lnx_64/conan_download_pckgs.sh | 79 ++++ build_scripts/conan/lnx_64/conanfile.txt | 7 + build_scripts/conan/lnx_64/profile.txt | 15 + build_scripts/linux/build-linux-github.sh | 96 +++++ build_scripts/linux/compile-package-linux.sh | 383 ++++++++++++++++++ build_scripts/misc/misc_functions.sh | 28 ++ .../windows/compile-package-windows.sh | 201 +++++++++ build_scripts/windows/package-windows.sh | 254 ++++++++++++ 11 files changed, 1102 insertions(+), 1 deletion(-) create mode 100644 build_scripts/conan/conanfile.mysqlapi.py create mode 100644 build_scripts/conan/conanfile.sparrowapi.py create mode 100755 build_scripts/conan/lnx_64/conan_download_pckgs.sh create mode 100644 build_scripts/conan/lnx_64/conanfile.txt create mode 100644 build_scripts/conan/lnx_64/profile.txt create mode 100755 build_scripts/linux/build-linux-github.sh create mode 100755 build_scripts/linux/compile-package-linux.sh create mode 100755 build_scripts/misc/misc_functions.sh create mode 100644 build_scripts/windows/compile-package-windows.sh create mode 100644 build_scripts/windows/package-windows.sh diff --git a/.gitignore b/.gitignore index e7d98ef9c056..a4ca6b45dea2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,6 @@ scalability_jobs_* *.bak -_build/ \ No newline at end of file +_build/ +_distrib/ +**generatorfiles* \ No newline at end of file diff --git a/build_scripts/conan/conanfile.mysqlapi.py b/build_scripts/conan/conanfile.mysqlapi.py new file mode 100644 index 000000000000..399844bcd2b4 --- /dev/null +++ b/build_scripts/conan/conanfile.mysqlapi.py @@ -0,0 +1,18 @@ +from conans import ConanFile, tools + + +class MysqlapiConan(ConanFile): + name = "mysqlapi" + version = "5.6.46-42010-SPW-000" + settings = "os", "compiler", "build_type", "arch" + description = "Mysql client API" + url = "None" + license = "None" + author = "None" + topics = None + + def package(self): + self.copy("*") + + def package_info(self): + self.cpp_info.libs = tools.collect_libs(self) diff --git a/build_scripts/conan/conanfile.sparrowapi.py b/build_scripts/conan/conanfile.sparrowapi.py new file mode 100644 index 000000000000..851c1d737c69 --- /dev/null +++ b/build_scripts/conan/conanfile.sparrowapi.py @@ -0,0 +1,18 @@ +from conans import ConanFile, tools + + +class SparrowapiConan(ConanFile): + name = "sparrowapi" + version = "5.6.46-42010-SPW-000" + settings = "os", "compiler", "build_type", "arch" + description = "Sparrow client API" + url = "None" + license = "None" + author = "None" + topics = None + + def package(self): + self.copy("*") + + def package_info(self): + self.cpp_info.libs = tools.collect_libs(self) diff --git a/build_scripts/conan/lnx_64/conan_download_pckgs.sh b/build_scripts/conan/lnx_64/conan_download_pckgs.sh new file mode 100755 index 000000000000..a387caeed2cd --- /dev/null +++ b/build_scripts/conan/lnx_64/conan_download_pckgs.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# Arguments + +# SCRIPT_DIR=$( cd -- "$( dirname -- "$0" )" &> /dev/null && pwd ) +# echo `date +"%x %X"` Script directory is $SCRIPT_DIR + +echo `date +"%x %X"` Initial directory `pwd` + +REDHAT_VERSION=`sed -e 's/.*release \([0-9]*\).*/\1/' /etc/redhat-release` +echo `date +"%x %X"` "Running on RedHat version $REDHAT_VERSION" + +GCC_VERSION=`gcc --version | head -n1 | sed -e 's/.*(GCC) \([0-9].[0-9]*\).*/\1/'` +echo `date +"%x %X"` "gcc version is $GCC_VERSION" + +PYTHON3_EXISTS=`which python3 2> /dev/null | wc -l` +if [ $PYTHON3_EXISTS -lt 1 ]; then + echo `date +"%x %X"` "python3 is required for conan, but it does not seem to be installed. Aborting." + exit 1 +fi + +PYTHON_EXISTS=`which python 2> /dev/null | wc -l` +if [ $PYTHON_EXISTS -ge 1 ]; then + PYTHON_VERSION=`python --version 2>&1 | sed -e 's/.*Python \([0-9]\).*/\1/'` + echo `date +"%x %X"` "python version is $PYTHON_VERSION" + + if [ "$PYTHON_VERSION" == "2" ]; then + echo `date +"%x %X"` To use conan, link /usr/bin/python to /usr/bin/python3 + rm -f /usr/bin/python + ln -s /usr/bin/python3 /usr/bin/python + elif [ "$PYTHON_VERSION" != "3" ]; then + echo `date +"%x %X"` Unknown version of python, $PYTHON_VERSION. Aborting + exit 1 + fi +fi + + +# Import third party libs from conan and set env variables to point to the downloaded libraries + +echo `date +"%x %X"` Fetching third party librairies from conan + +export PATH=$PATH:/opt/cmake/bin + +# Be sure the conanfile.txt contains the generator compiler_args +generator_folder=generatorfiles_lnx${REDHAT_VERSION} +rm -Rf $generator_folder; mkdir $generator_folder +conan install -pr profile.txt --build missing conanfile.txt -if $generator_folder -s compiler.version="$GCC_VERSION" + +if [[ $? -ne 0 ]]; then + echo `date +"%x %X"` Error: conan install failed. Aborting. + exit 1 +fi + +echo `date +"%x %X"` Setting environment variables pointing to librairies + +SSLDIR=`grep -m 1 'openssl.*include$' $generator_folder/conanbuildinfo.txt` +SSLDIR=${SSLDIR%/*} +if [ -z "${SSLDIR}" ]; then + echo `date +"%x %X"` Error: opennssl library has not been downloaded. Aborting. + exit 1 +fi +export SSLDIR +echo "Openssl dir $SSLDIR" + +BOOSTDIR=`grep -m 1 'boost.*include$' $generator_folder/conanbuildinfo.txt` +BOOSTDIR=${BOOSTDIR%/*} +if [ -z "${BOOSTDIR}" ]; then + echo `date +"%x %X"` Error: boost library has not been downloaded. Aborting. + exit 1 +fi +export BOOSTDIR +echo "Boost dir $BOOSTDIR" + + +if [ "$PYTHON_VERSION" == "2" ]; then + echo `date +"%x %X"` Set the link /usr/bin/python back to /usr/bin/python2 + rm -f /usr/bin/python + ln -s /usr/bin/python2 /usr/bin/python +fi diff --git a/build_scripts/conan/lnx_64/conanfile.txt b/build_scripts/conan/lnx_64/conanfile.txt new file mode 100644 index 000000000000..48279f3f908f --- /dev/null +++ b/build_scripts/conan/lnx_64/conanfile.txt @@ -0,0 +1,7 @@ +[requires] +openssl/1.1.1s +boost/1.77.0 + +[generators] +compiler_args + diff --git a/build_scripts/conan/lnx_64/profile.txt b/build_scripts/conan/lnx_64/profile.txt new file mode 100644 index 000000000000..60b8d1a69169 --- /dev/null +++ b/build_scripts/conan/lnx_64/profile.txt @@ -0,0 +1,15 @@ +[settings] +os=Linux +os_build=Linux +arch=x86_64 +arch_build=x86_64 +compiler=gcc +compiler.libcxx=libstdc++11 +build_type=Release + +[options] + +[build_requires] + +[env] + diff --git a/build_scripts/linux/build-linux-github.sh b/build_scripts/linux/build-linux-github.sh new file mode 100755 index 000000000000..658de8dd297d --- /dev/null +++ b/build_scripts/linux/build-linux-github.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Arg 1: Build type: release or debug. Default is release +# Arg 3: What to do. If set "extract_only", then the source code is downloaded and extracted, but the build is not launched. +# If set to "do_not_build", the build process will go one step further. It will download the conan packages and execute cmake. +# If not set ot left empty, the build process will execute completely. That's the default. + +# Required environmeent variables +# REPO_USER: User name to use to clone repo where MySQL / Sparrow source is stored +# REPO_PSSWD: Password / token to use to clone repo where MySQL / Sparrow source is stored +# REPO_BRANCH: Branch to check out + +# if [[ -z "$REPO_USER" || -z "$REPO_PSSWD" ]] ; then +# echo "Env variable REPO_USER and REPO_PSSWD need to be define and give the credentials to the opensource repository." +# exit 1 +# fi + +# if [ -z "$REPO_BRANCH" ] ; then +# REPO_BRANCH=8.0 +# fi +# echo `date +"%x %X"` Building branch $REPO_BRANCH + +BUILD_MODE=release +if [ "$1" == "debug" ] ; then + BUILD_MODE=debug +fi +echo `date +"%x %X"` Build mode is $BUILD_MODE + +OPTIONS=$2 +# EXTRACT_ONLY=false +# if [ "$OPTIONS" == "extract_only" ] ; then +# EXTRACT_ONLY=true +# fi +# echo "EXTRACT_ONLY is $EXTRACT_ONLY" + +if [ -z "$CI_COMMIT_TAG" ]; then + echo "Env variable CI_COMMIT_TAG is empty or not defined. It must be set to a valid tag values, such as 4.2.123 or 4.2.123-spw-287." + exit 1 +fi + +SCRIPT_NAME=$(basename "$0") +SCRIPT_DIR=$( cd -- "$( dirname -- "$0" )" &> /dev/null && pwd ) +echo `date +"%x %X"` "$SCRIPT_NAME directory: $SCRIPT_DIR" + +# Setting execution rights on scripts +echo `date +"%x %X"` Set file rights to additional build scripts +chmod +x $SCRIPT_DIR/*.sh $SCRIPT_DIR/../misc/*.sh $SCRIPT_DIR/../conan/lnx_64/*.sh + +# Importing helper functions +# source $SCRIPT_DIR/../misc/misc_functions.sh + +# Set the source root folder as working folder. +cd $SCRIPT_DIR/../..; +SOURCE_ROOT_FOLDER=`pwd` +echo `date +"%x %X"` "Source root folder is $SOURCE_ROOT_FOLDER" + +# Tag must look like 4.2.123 (without patch), 4.2.123-spw-287-b (with patch) +# Full version (i.e. major.minor.build) +FULL_VERSION=`echo ${CI_COMMIT_TAG} | sed -n 's/\([0-9.]*\).*/\1/p'` +PATCH_VERSION=`echo ${CI_COMMIT_TAG} | sed -n 's/[0-9.]*-\(.*\)/\1/p'` + +echo "FULL_VERSION is $FULL_VERSION" + +SPW_BUILD_VERSION=`echo ${FULL_VERSION} | cut -d '.' -f 1,2` +SPW_BUILD_VERSION_MAJOR=`echo ${FULL_VERSION} | cut -d '.' -f 1` +SPW_BUILD_VERSION_MINOR=`echo ${FULL_VERSION} | cut -d '.' -f 2` +SPW_BUILD_VERSION_BUILD=`echo ${FULL_VERSION} | cut -d '.' -f 3` +SPW_BUILD_VERSION_FULL=${FULL_VERSION} +SPW_BUILD_VERSION_PATCH=${PATCH_VERSION} +echo "SPW_BUILD_VERSION is $SPW_BUILD_VERSION ($SPW_BUILD_VERSION_MAJOR.$SPW_BUILD_VERSION_MINOR.$SPW_BUILD_VERSION_BUILD)" +echo "SPW_BUILD_VERSION_PATCH is $SPW_BUILD_VERSION_PATCH" + +export SPW_BUILD_VERSION SPW_BUILD_VERSION_MAJOR SPW_BUILD_VERSION_MINOR SPW_BUILD_VERSION_BUILD SPW_BUILD_VERSION_FULL SPW_BUILD_VERSION_PATCH + +if [ -z "${SPW_BUILD_VERSION_PATCH}" ]; then + echo `date +"%x %X"` Building sparrow $SPW_BUILD_VERSION_FULL, build mode $BUILD_MODE +else + echo `date +"%x %X"` Building sparrow $SPW_BUILD_VERSION_FULL, patch $SPW_BUILD_VERSION_PATCH, build mode $BUILD_MODE +fi + +# git clone -b $REPO_BRANCH https://${REPO_USER}:${REPO_PSSWD}@github.com/infovista-opensource/mysql-server-timeseries.git . + +# echo `date +"%x %X"` Build and distribution folders +# mkdir -p _build +# mkdir -p _distrib + +# if [ "$EXTRACT_ONLY" = true ]; then +# echo "Code has been extracted." +# exit 0 +# fi + +echo `date +"%x %X"` Executing script to compile source code and make packages, $SOURCE_ROOT_FOLDER/build/mysql/compile-package-linux.sh +# $SCRIPT_DIR/compile-package-linux.sh $SOURCE_ROOT_FOLDER $CI_COMMIT_TAG $BUILD_MODE $OPTIONS +$SCRIPT_DIR/compile-package-linux.sh $CI_COMMIT_TAG $BUILD_MODE $OPTIONS + +echo `date +"%x %X"` Done diff --git a/build_scripts/linux/compile-package-linux.sh b/build_scripts/linux/compile-package-linux.sh new file mode 100755 index 000000000000..63854e5a2f03 --- /dev/null +++ b/build_scripts/linux/compile-package-linux.sh @@ -0,0 +1,383 @@ +#!/bin/bash +#Arg 1 : Sparrow build number. Example: 4.2.123, or 4.2.123-SPW-387 +#Arg 2 : debug or release build. +#Arg 3 : Options. If set to "do_not_build", the script will setup the build env, but won't start the build. +# If set to "do_not_pack", the script will setup the build env, compiles everything but does not generate the packages. + + +# ---------------- Checking argument --------------------- +# and initializing some global variables + +SPARROW_BUILD_NUM=$1 +echo `date +"%x %X"` "Build number $SPARROW_BUILD_NUM" + +BUILD_MODE=$2 +if [ -z "$BUILD_MODE" ] ; then + BUILD_MODE=release +fi +echo `date +"%x %X"` "Build mode $2" + +OPTIONS=$3 +if [ -n "$OPTIONS" ]; then + echo `date +"%x %X"` "Build options $OPTIONS" +fi + +# Used only for dev purposes +DO_NOT_BUILD=false +if [ "$OPTIONS" == "do_not_build" ] ; then + DO_NOT_BUILD=true +fi +echo "DO_NOT_BUILD is $DO_NOT_BUILD" + +# Used only for dev purposes +DO_NOT_PACK=false +if [ "$OPTIONS" == "do_not_pack" ] ; then + DO_NOT_PACK=true +fi +echo "DO_NOT_PACK is $DO_NOT_PACK" + +SCRIPT_NAME=$(basename "$0") +SCRIPT_DIR=$( cd -- "$( dirname -- "$0" )" &> /dev/null && pwd ) +echo `date +"%x %X"` "$SCRIPT_NAME directory: $SCRIPT_DIR" + +cd $SCRIPT_DIR/../..; +SOURCE_ROOT_FOLDER=`pwd` +echo `date +"%x %X"` "Source root folder is $SOURCE_ROOT_FOLDER" + + +# Make a package containing everything: the mysql server files and tools, the libmysqlclient API and the sparrow API. +# This generic package will then be used to create the docker images of the dbsrv and poller runtime. + +generate_distrib_pack() { + + echo `date +"%x %X"` "Packaging all binaries and dependencies into a single package $4." + + # First delete any previous zip file left behind in the _distrib folder + rm -f $3/$4 > /dev/null 2>&1 + + pushd $2 + rm -rf _distrib_tmp > /dev/null 2>&1 + + echo `date +"%x %X"` "Gathering all required files for a DB server installation." + distrib_folder=_distrib_tmp/mysql_${BUILD_MODE} + mkdir -p $distrib_folder + cp -r share bin lib $distrib_folder + + echo `date +"%x %X"` "Gathering files for libmysqlclient API." + cp lib/libmysqlclient.so $distrib_folder/lib + + echo `date +"%x %X"` "Gathering files for Sparrow UDF plugin." + mkdir -p $distrib_folder/lib/plugin + cp $1/storage/sparrow/udf/libsparrowudf.so $distrib_folder/lib/plugin + + echo `date +"%x %X"` "Packaging everything into the compressed file $3/$4." + cd $distrib_folder + tar -czvf $3/$4.tar.gz * + # zip -r -3 $3/$4 * + res=$? + if [ $? -ne 0 ]; then + echo `date +"%x %X"` "Tar gzip all files into a package failed." + return $res + fi + + popd +} + +generate_mysqlapi_pack() { + + echo `date +"%x %X"` "Packaging MySQL API library and headers, version $SPARROW_BUILD_NUM, $BUILD_MODE, into a single package $4." + + # First delete any previous zip file left behind in the _distrib folder + rm -f $3/$4 > /dev/null 2>&1 + + pushd $2 + + echo `date +"%x %X"` "Gathering all required files for the MySQL client API." + distrib_folder=_distrib_tmp/mysqlapi_${BUILD_MODE} + rm -rf $distrib_folder > /dev/null 2>&1 + mkdir -p $distrib_folder + cd $distrib_folder + + mkdir lib include + cp -r ../../include/* include + cp ../../lib/libmysqlclient.so ../../lib/libmysqlclient.a lib + + echo `date +"%x %X"` "Packaging everything into the compressed file $3/$4." + tar -czvf $3/$4.tar.gz * + res=$? + if [ $? -ne 0 ]; then + echo `date +"%x %X"` "Tar gzip all files into a package failed." + return $res + fi + + popd +} + + +generate_sparrowapi_pack() { + + echo `date +"%x %X"` "Packaging Sparrow API library and headers, version $SPARROW_BUILD_NUM, $BUILD_MODE, into a single package $4." + + # First delete any previous zip file left behind in the _distrib folder + rm -f $3/$4 > /dev/null 2>&1 + + pushd $2 + + echo `date +"%x %X"` "Gathering all required files for the MySQL client API." + distrib_folder=_distrib_tmp/sparrowapi_${BUILD_MODE} + rm -rf $distrib_folder > /dev/null 2>&1 + mkdir -p $distrib_folder + cd $distrib_folder + + mkdir lib include + cp ../../lib/libsparrowapi.so lib + cp -r $SOURCE_ROOT_FOLDER/storage/sparrow/api/include/* include + + echo `date +"%x %X"` "Packaging everything into the compressed file $3/$4." + tar -czvf $3/$4.tar.gz * + res=$? + if [ $? -ne 0 ]; then + echo `date +"%x %X"` "Tar gzip all files into a package failed." + return $res + fi + + popd +} + + + +# Packages the libmysqlclient API into a conan package and uploads it to the conan repository on jfrog. +generate_mysqlapi_conan_pack() { + pushd $1 + + echo `date +"%x %X"` "Packaging the MySQL libmysqlclient API into a conan package, version $SPARROW_BUILD_NUM, $BUILD_MODE." + rm -Rf _conan/mysqlapi + mkdir -p _conan/mysqlapi + cd _conan/mysqlapi + + cp $SOURCE_ROOT_FOLDER/conan/lnx_64/profile.txt profile.txt + cp $SOURCE_ROOT_FOLDER/conan/conanfile.mysqlapi.py conanfile.py + sed -E "s/version[ \t]*=[ \t]*\".*\"/version = \"$SPARROW_BUILD_NUM\"/" conanfile.py > conanfile.new.py + mv -f conanfile.new.py conanfile.py + rm -f conanfile.new.py + + if [ $BUILD_MODE = "debug" ]; then + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Debug/" profile.txt > profile.new.txt + else + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Release/" profile.txt > profile.new.txt + fi + mv -f profile.new.txt profile.txt + rm -f profile.new.txt + + mkdir lib include + cp -r ../../include/* include + cp ../../lib/libmysqlclient.so ../../lib/libmysqlclient.a lib + + echo `date +"%x %X"` "Exporting the mysqlapi conan package." + conan export-pkg . mysqlapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable -pr profile.txt --force -s compiler.version="$GCC_VERSION" + res=$? + if [ $? -ne 0 ]; then + echo `date +"%x %X"` "conan export-pkg failed with error code $res." + return $res + fi + + echo `date +"%x %X"` "Uploading the mysqlapi conan package to JFrog." + conan upload -r jfrog mysqlapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable --all --no-overwrite recipe + res=$? + if [ $? -ne 0 ]; then + echo `date +"%x %X"` "conan upload failed with error code $res." + return $res + fi + + popd +} + +# Packages the Sparrow API into a conan package and uploads it to the conan repository on jfrog. + +generate_sparrowapi_conan_pack() { + pushd $1 + + echo `date +"%x %X"` "Packaging the Sparrow API into a conan package, version $SPARROW_BUILD_NUM, $BUILD_MODE." + rm -Rf _conan/sparrowapi + mkdir -p _conan/sparrowapi + cd _conan/sparrowapi + + cp $SOURCE_ROOT_FOLDER/conan/lnx_64/profile.txt profile.txt + cp $SOURCE_ROOT_FOLDER/conan/conanfile.sparrowapi.py conanfile.py + sed -E "s/version[ \t]*=[ \t]*\".*\"/version = \"$SPARROW_BUILD_NUM\"/" conanfile.py > conanfile.new.py + mv -f conanfile.new.py conanfile.py + rm -f conanfile.new.py + + if [ $BUILD_MODE = "debug" ]; then + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Debug/" profile.txt > profile.new.txt + else + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Release/" profile.txt > profile.new.txt + fi + mv -f profile.new.txt profile.txt + rm -f profile.new.txt + + mkdir lib include + cp ../../lib/libsparrowapi.so lib + cp -r $SOURCE_ROOT_FOLDER/storage/sparrow/api/include/* include + + echo `date +"%x %X"` "Exporting the sparrowapi conan package." + conan export-pkg . sparrowapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable -pr profile.txt --force -s compiler.version="$GCC_VERSION" + res=$? + if [ $? -ne 0 ]; then + echo `date +"%x %X"` "conan export-pkg failed with error code $res." + return $res + fi + + echo `date +"%x %X"` "Uploading the sparrowapi conan package to JFrog." + conan upload -r jfrog sparrowapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable --all --no-overwrite recipe + res=$? + if [ $? -ne 0 ]; then + echo `date +"%x %X"` "conan upload failed with error code $res." + return $res + fi + + popd +} + +# ---------------- Script actually starts here --------------------- + +# env | sort; + +REDHAT_VERSION=`sed -e 's/.*release \([0-9]*\).*/\1/' /etc/redhat-release` +echo `date +"%x %X"` "Running on RedHat version $REDHAT_VERSION" + +GCC_VERSION=`gcc --version | head -n1 | sed -e 's/.*(GCC) \([0-9].[0-9]*\).*/\1/'` +echo `date +"%x %X"` "gcc version is $GCC_VERSION" + + +# Execute the conan script to get the openssl third party lib +echo `date +"%x %X"` "Executing conan script" +cd $SOURCE_ROOT_FOLDER/build_scripts/conan/lnx_64 + +. ./conan_download_pckgs.sh + +if [ -z "$SSLDIR" ]; then + echo `date +"%x %X"` Missing SSLDIR + exit 1 +fi + +if [ -z "$BOOSTDIR" ]; then + # Checks the MySQL source code includes the boost library it requires. + # If so, set the BOOSTDIR accordingly + cd $SOURCE_ROOT_FOLDER + if [ ! -d boost ]; then + echo `date +"%x %X"` Missing boost library. + exit 1 + fi + + cd boost + BOOSTDIR_VER=`ls` + BOOSTDIR=$SOURCE_ROOT_FOLDER/boost/$BOOSTDIR_VER +else + BOOSTDIR=$BOOSTDIR/include +fi +echo `date +"%x %X"` Boost dir is $BOOSTDIR + +# Prepare the build folder which will contain the CMake resulting files and the compilation files +# and prepare the distrib build folder which will the subset of files we package and distribute. +export LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:$LD_LIBRARY_PATH + +build_dir=$SOURCE_ROOT_FOLDER/_build/lnx${REDHAT_VERSION}_64 +mkdir -p $build_dir + +distrib_dir=$SOURCE_ROOT_FOLDER/_distrib/lnx${REDHAT_VERSION}_64 +mkdir -p $distrib_dir + +cd $build_dir + +echo `date +"%x %X"` "Creating $BUILD_MODE sub-dir in _build. Removing previous $BUILD_MODE sub-dir if it existed." +rm -Rf $BUILD_MODE +mkdir $BUILD_MODE +cd $BUILD_MODE +build_dir_arch=$build_dir/$BUILD_MODE + +# Build source code. Try to build and embed only the required modules. So remove from build all module that are not needed. +echo `date +"%x %X"` "Starting $BUILD_MODE build" +CMAKE_OPTIONS="-DWITH_UNIT_TESTS=0 -DWITHOUT_GROUP_REPLICATION=1 -DWITHOUT_HEAP_STORAGE_ENGINE=1 -DWITHOUT_CSV_STORAGE_ENGINE=1 -DWITHOUT_ARCHIVE_STORAGE_ENGINE=1 -DWITHOUT_BLACKHOLE_STORAGE_ENGINE=1 -DWITHOUT_EXAMPLE_STORAGE_ENGINE=1 -DWITHOUT_FEDERATED_STORAGE_ENGINE=1 -DBUILD_CONFIG=mysql_${BUILD_MODE} -DWITH_SSL=$SSLDIR -DWITH_BOOST=$BOOSTDIR" +echo "CMAKE_OPTIONS is " $CMAKE_OPTIONS + +if [ $BUILD_MODE = "debug" ]; then + cmake ../../.. $CMAKE_OPTIONS -DCOMPILATION_COMMENT="build: $SPARROW_BUILD_NUM" -DCMAKE_BUILD_MODE=Debug +else + cmake ../../.. $CMAKE_OPTIONS -DCOMPILATION_COMMENT="build: $SPARROW_BUILD_NUM" -DCMAKE_BUILD_MODE=RelWithDebInfo +fi +res=$? +if [ $res -ne 0 ]; then + echo `date +"%x %X"` "Cmake for $BUILD_MODE build failed." + exit $res +fi + +if [ "$DO_NOT_BUILD" = true ]; then + echo `date +"%x %X"` "Build setup and CMake are done." + exit 0 +fi + +# Compile everything +echo `date +"%x %X"` "Compiling source code..." +make package +echo `date +"%x %X"` "Compiling source code finished." + +export PACKAGE_DIR=`ls -l $build_dir_arch/_CPack_Packages/Linux/TGZ | grep mysql- | head -n1 | awk '{print $NF}'` + +export MYSQL_TAG=`echo $PACKAGE_DIR | sed -e 's/mysql-\([0-9.]*\)-.*/\1/'` +echo `date +"%x %X"` "MySQL tag is $MYSQL_TAG" + +if [ "$DO_NOT_PACK" = true ]; then + echo `date +"%x %X"` "Source compilation is done. Packaging is skipped." + exit 0 +fi + +echo `date +"%x %X"` "Zipping distribution package containing binaries and configuration files to be deployed." +generate_distrib_pack $build_dir_arch $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir sparrow-distrib-$SPARROW_BUILD_NUM-x64-${BUILD_MODE} +if [ $? -ne 0 ]; then + echo Failed to generate distribution package. + exit 1 +fi + +generate_mysqlapi_pack $build_dir_arch $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir mysqlapi-$SPARROW_BUILD_NUM-x64-${BUILD_MODE} +if [ $? -ne 0 ]; then + echo Failed to generate MySQL API package. + exit 1 +fi + +generate_sparrowapi_pack $build_dir_arch $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir sparrowapi-$SPARROW_BUILD_NUM-x64-${BUILD_MODE} +if [ $? -ne 0 ]; then + echo Failed to generate Sparrow API package. + exit 1 +fi + +# This needs to be changed to point to sourceforge or something +# echo `date +"%x %X"` "Uploading distribution package $distrib_dir/sparrow-$SPARROW_BUILD_NUM.zip to JFrog generic package repository." +# # curl --header "Authorization: Bearer $CI_JOB_TOKEN" --upload-file $distrib_dir/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip $CI_API_V4_URL/projects/$CI_PROJECT_ID/packages/generic/sparrow/$SPARROW_BUILD_NUM/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip?select=package_file +# curl -u ${RELEASE_GENERIC_USER}:${RELEASE_GENERIC_PASSWORD} --upload-file $distrib_dir/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip ${RELEASE_GENERIC_REPO}/sparrow/$SPARROW_BUILD_NUM/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip +# if [ $? -ne 0 ]; then +# echo Failed to upload distribution package. +# exit 1 +# fi + +# It will probably not work to upload packages to our private conan repo from infovista-opensource. Packahes will have to be uploaded to sourceforge or something and then +# downloaded from there in the Net Poller's docker pre-build scripts. +# echo `date +"%x %X"` "Packaging the mysql api for conan" +# generate_mysqlapi_conan_pack $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 +# if [ $? -ne 0 ]; then +# echo Failed to make or upload conan package for the mysql api. +# exit 1 +# fi + +# echo `date +"%x %X"` "Packaging the sparrow api for conan" +# generate_sparrowapi_conan_pack $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 +# if [ $? -ne 0 ]; then +# echo Failed to make or upload conan package for the sparrow api. +# exit 1 +# fi + +# echo `date +"%x %X"` Cleaning up +# rm -rf _CPack_Packages +# make clean + +echo `date +"%x %X"` $BUILD_MODE build finished diff --git a/build_scripts/misc/misc_functions.sh b/build_scripts/misc/misc_functions.sh new file mode 100755 index 000000000000..3ad8a5a3f4b5 --- /dev/null +++ b/build_scripts/misc/misc_functions.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +download_and_untar() +{ + FOLDER_SRC=$1 + PACKAGE=$2 + FOLDER_DST=$3 + echo "Downloading ${RELEASE_GENERIC_REPO}/${FOLDER_SRC}/${PACKAGE} and decompressing in ${FOLDER_DST}" + curl --user ${RELEASE_GENERIC_USER}:${RELEASE_GENERIC_PASSWORD} ${RELEASE_GENERIC_REPO}/${FOLDER_SRC}/${PACKAGE} -Lo ${PACKAGE} + if [[ $? -ne 0 ]]; then + echo Error: ${PACKAGE} was not downloaded. Aborting. + rm -f ${PACKAGE} + exit 1 + fi + # Just for debugging purposes + echo "curl result is " `ls -l ${PACKAGE}` + # if [ -f ${PACKAGE} ] ; then + # echo "Content of downloaded file is: " `head -n5 ${PACKAGE}` + # fi + + if [ -z "$FOLDER_DST" ]; then + echo "Decompressing ${PACKAGE}" + tar -xf ${PACKAGE} + else + echo "Decompressing ${PACKAGE} into ${FOLDER_DST}" + tar -xf ${PACKAGE} -C ${FOLDER_DST} + fi +} diff --git a/build_scripts/windows/compile-package-windows.sh b/build_scripts/windows/compile-package-windows.sh new file mode 100644 index 000000000000..168b5a37c57e --- /dev/null +++ b/build_scripts/windows/compile-package-windows.sh @@ -0,0 +1,201 @@ +#!/bin/bash +#Arg 1 : Build directory. Example: C:\temp\builds\mysql-4.2\5.6.46-42010-SPW-370. +#Arg 2 : mysql tag (version). Example: 5.6.46 +#Arg 3 : Sparrow build number. Example: 42010-SPW-370 +#Arg 4 : win64 or win32. + +echo "Source directory $1" +echo "Mysql version $2" +echo "Build number $3" +echo "Build type $4" + +generate() { + # First delete any previous zip file left behind + rm -f $2 $3 $4 + + pushd $1 + rm -rf share/Makefile* share/*.sql share/*.txt + zip -r -9 $2 bin share + popd + pushd storage/sparrow/udf/$5 + rm -rf lib + mkdir lib + mkdir lib/plugin + cp sparrowudf.dll lib/plugin/ + cp sparrowudf.lib lib/plugin/ + cp sparrowudf.pdb lib/plugin/ + zip -r -9 $2 lib + rm -rf lib + popd + + pushd ../../storage/sparrow/api + zip -r -9 $3 include + popd + pushd storage/sparrow/$5 + zip -r -9 $3 sparrowapi.dll sparrowapi.lib sparrowapi.pdb + popd + + pushd $1 + zip -r -9 $4 include + popd + pushd libmysql/$5 + zip -r -9 $4 libmysql.dll libmysql.lib libmysql.pdb + popd +} + +build_dir=$1/_build +cd $build_dir + + +if [ $4 = "win32" ]; then + + # Create win32 build dir, where cmake will output all its files and where the build is going to take place. Example: C:\temp\builds\mysql-4.2\5.6.46-42010-SPW-370\_build\win32 + echo `date` "Creating win32 sub-dir of _build. Removing previous win32 sub-dir if it existed." + rm -rf win32 + mkdir win32 + cd win32 + + echo `date` "Starting win32 cmake." + cmake ../.. -DCOMPILATION_COMMENT="build: $3" -G "Visual Studio 16 2019" -A x86 -DWITH_EMBEDDED_SERVER=0 -DWITHOUT_BLACKHOLE_STORAGE_ENGINE=1 -DWITHOUT_EXAMPLE_STORAGE_ENGINE=1 -DWITHOUT_FEDERATED_STORAGE_ENGINE=1 -DWITH_SSL=T:\\core\\22.2\\openssl\\1.1.1s\\win32 + res=$? + if [ $res -ne 0 ]; then + echo `date` "Win32 cmake of source code failed." + exit $res + fi + + echo `date` "Starting win32 debug build" + devenv mysql.sln /build Debug /project ALL_BUILD /out build_win32_debug.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Debug build of source code failed." + exit $res + fi + + echo `date` "Generating debug package" + devenv mysql.sln /build Debug /project package /out build_win32_debug.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Packaging failed." + exit $res + fi + + echo `date` "Packaging done. Zipping debug distribution packages" + mkdir -p $1/_distrib/win32 + distrib_dir=$1/_distrib/win32 + generate _CPack_Packages/win32/ZIP/mysql-$2-winx32 $distrib_dir/mysql_debug.zip $distrib_dir/sparrowapi_debug.zip $distrib_dir/mysqlapi_debug.zip Debug 2>&1 + + + echo `date` "Starting win32 release build" + devenv mysql.sln /build RelWithDebInfo /project ALL_BUILD /out build_win32_release.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Release build of source code failed." + exit $res + fi + + echo `date` "Generating release package" + devenv mysql.sln /build RelWithDebInfo /project package /out build_win32_release.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Packaging failed." + exit $res + fi + + echo `date` "Packaging done. Zipping debug distribution packages" + generate _CPack_Packages/win32/ZIP/mysql-$2-winx32 $distrib_dir/mysql_release.zip $distrib_dir/sparrowapi_release.zip $distrib_dir/mysqlapi_release.zip RelWithDebInfo 2>&1 + + + echo `date` "Generating initial database" + devenv mysql.sln /build RelWithDebInfo /project initial_database /out build_win32_release.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Initial database build failed." + exit $res + fi + + echo `date` "Cleaning up debug build" + rm -rf _CPack_Packages + devenv mysql.sln /clean Debug /out build_win32_debug.log + + echo `date` "Cleaning up release build" + rm -rf _CPack_Packages + devenv mysql.sln /clean RelWithDebInfo /out build_win32_release.log + + echo `date` "Win32 build finished" + +else + + # Create win64 build dir, where cmake will output all its files and where the build is going to take place. Example: R:\Sparrow\4.0\5.5.27-40061\_build\win64 + echo `date` "Creating win64 sub-dir of _build. Removing previous win64 sub-dir if it existed." + rm -rf win64 + mkdir win64 + cd win64 + + echo `date` "Starting win64 cmake." + cmake ../.. -DCOMPILATION_COMMENT="build: $3" -G "Visual Studio 16 2019" -A x64 -DWITH_EMBEDDED_SERVER=0 -DWITHOUT_BLACKHOLE_STORAGE_ENGINE=1 -DWITHOUT_EXAMPLE_STORAGE_ENGINE=1 -DWITHOUT_FEDERATED_STORAGE_ENGINE=1 -DWITH_SSL=T:\\core\\22.2\\openssl\\1.1.1s\\win64 + res=$? + if [ $res -ne 0 ]; then + echo `date` "Win64 cmake of source code failed." + exit $res + fi + + echo `date` "Starting win64 debug build" + devenv mysql.sln /build Debug /project ALL_BUILD /out build_win64_debug.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Debug build of source code failed." + exit $res + fi + + echo `date` "Generating debug package" + devenv mysql.sln /build Debug /project package /out build_win64_debug.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Packaging failed." + exit $res + fi + + echo `date` "Packaging done. Zipping debug distribution packages" + mkdir -p $1/_distrib/win64 + distrib_dir=$1/_distrib/win64 + generate _CPack_Packages/win64/ZIP/mysql-$2-winx64 $distrib_dir/mysql_debug.zip $distrib_dir/sparrowapi_debug.zip $distrib_dir/mysqlapi_debug.zip Debug 2>&1 + + + echo `date` "Starting win64 release build" + devenv mysql.sln /build RelWithDebInfo /project ALL_BUILD /out build_win64_release.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Release build of source code failed." + exit $res + fi + + echo `date` "Generating release package" + devenv mysql.sln /build RelWithDebInfo /project package /out build_win64_release.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Packaging failed." + exit $res + fi + + echo `date` "Packaging done. Zipping debug distribution packages" + generate _CPack_Packages/win64/ZIP/mysql-$2-winx64 $distrib_dir/mysql_release.zip $distrib_dir/sparrowapi_release.zip $distrib_dir/mysqlapi_release.zip RelWithDebInfo 2>&1 + + + echo `date` "Generating initial database" + devenv mysql.sln /build RelWithDebInfo /project initial_database /out build_win64_release.log + res=$? + if [ $res -ne 0 ]; then + echo `date` "Initial database build failed." + exit $res + fi + + #echo `date` "Cleaning up debug build" + #rm -rf _CPack_Packages + #devenv mysql.sln /clean Debug /out build_win64_debug.log + + #echo `date` "Cleaning up release build" + #rm -rf _CPack_Packages + #devenv mysql.sln /clean RelWithDebInfo /out build_win64_release.log + + echo `date` "Win64 build finished" +fi diff --git a/build_scripts/windows/package-windows.sh b/build_scripts/windows/package-windows.sh new file mode 100644 index 000000000000..e29e15d51215 --- /dev/null +++ b/build_scripts/windows/package-windows.sh @@ -0,0 +1,254 @@ +#!/bin/bash +#Arg 1 : Root directory where the source code is built. +#Arg 2 : mysql tag (version). Example: 5.6.46 +#Arg 3 : Sparrow build number. Example: 4.2.123, or 4.2.123-SPW-387 +#Arg 4 : debug or release build. +#Arg 5 : Options. If set to "do_not_build", the script will setup the build env, but won't start the build. + +echo `date +"%x %X"` "Source directory $1" +echo `date +"%x %X"` "Mysql version $2" +echo `date +"%x %X"` "Build number $3" +echo `date +"%x %X"` "Build mode $4" + +SRC_DIR=$1 +MYSQL_TAG=$2 +SPARROW_BUILD_NUM=$3 +BUILD_MODE=$4 +OPTIONS=$5 + +SCRIPT_NAME=$(basename "$0") +SCRIPT_DIR=$( cd -- "$( dirname -- "$0" )" &> /dev/null && pwd ) +echo "$SCRIPT_NAME directory: $SCRIPT_DIR" + +BUILD_SCRIPTS_FOLDER=$SCRIPT_DIR/../.. + +if [ -z "$SRC_DIR" ]; then + SRC_DIR="$SCRIPT_DIR/../../../mysql_src" + echo `date +"%x %X"` "Missing source directory. Using $SRC_DIR as default." +fi + +if [ -z "$MYSQL_TAG" ]; then + MYSQL_TAG="5.7.26" + echo `date +"%x %X"` "Missing mysql tag. No default. Aborting." + exit 1 +fi + +if [ -z "$SPARROW_BUILD_NUM" ]; then + echo `date +"%x %X"` "Missing Sparrow build number. No default. Aborting." + exit 1 +fi + +if [ -z "$BUILD_MODE" ]; then + BUILD_MODE="release" + echo `date +"%x %X"` "Missing build mode. Using $BUILD_MODE as default." +fi + +if [ -n "$OPTIONS" ]; then + echo `date +"%x %X"` "Build options $OPTIONS" +fi + +DO_NOT_BUILD=false +if [ "$OPTIONS" == "do_not_build" ] ; then + DO_NOT_BUILD=true +fi +echo "DO_NOT_BUILD is $DO_NOT_BUILD" + +generate() { + # First delete any previous zip file left behind + rm -f $2 $3 $4 > /dev/null 2>&1 + + pushd $1 + rm -rf share/Makefile* share/*.sql share/*.txt + zip -r -9 $2 bin share + popd + pushd storage/sparrow/udf + rm -rf lib + mkdir lib + mkdir lib/plugin + cp libsparrowudf.so lib/plugin/ + zip -r -9 $2 lib + rm -rf lib + popd + + pushd ../../storage/sparrow/api + zip -r -9 $3 include + popd + pushd storage/sparrow + zip -r -9 $3 libsparrowapi.so + popd + + pushd $1 + zip -r -9 $4 include + popd + pushd $1/lib + zip -r -9 $4 libmysqlclient*.* + popd +} + +generate_distrib_pack() { + # First delete any previous zip file left behind in the _distrib folder + rm -f $3/$4 > /dev/null 2>&1 + + pushd $2 + rm -r _distrib_tmp + + bin_folder=_distrib_tmp/mysql_${BUILD_MODE} + mkdir -p $bin_folder + + cp -r share bin $bin_folder + + mkdir $bin_folder/lib + cp lib/libmysqlclient.a lib/libmysqlclient.so $bin_folder/lib + + mkdir -p $bin_folder/lib/plugin + cp $1/storage/sparrow/udf/libsparrowudf.so $bin_folder/lib/plugin + +# pushd $1 + data_folder=_distrib_tmp/mysql_data + mkdir -p $data_folder + cp -r data/mysql data/performance_schema $data_folder + + sparrow_folder=_distrib_tmp/sparrowapi_${BUILD_MODE} + mkdir -p $sparrow_folder +# popd + cp lib/libsparrowapi.so $sparrow_folder + # cp -r ../../storage/sparrow/api/include $1/$sparrow_folder +# pushd $1 + + cd _distrib_tmp + zip -r -3 $3/$4 * + cd .. + + popd +} + + +generate_mysqlapi_conan_pack() { + pushd $1 + + rm -Rf _conan/mysqlapi + mkdir -p _conan/mysqlapi + cd _conan/mysqlapi + + cp $BUILD_SCRIPTS_FOLDER/conan/lnx_64/profile.txt profile.txt + cp $BUILD_SCRIPTS_FOLDER/conan/conanfile.mysqlapi.py conanfile.py + sed -E "s/version[ \t]*=[ \t]*\".*\"/version = \"$SPARROW_BUILD_NUM\"/" conanfile.py > conanfile.new.py + mv -f conanfile.new.py conanfile.py + rm -f conanfile.new.py + + if [ $BUILD_MODE = "debug" ]; then + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Debug/" profile.txt > profile.new.txt + else + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Release/" profile.txt > profile.new.txt + fi + mv -f profile.new.txt profile.txt + rm -f profile.new.txt + + mkdir lib include + cp -r ../../include/* include + cp ../../lib/libmysqlclient.so ../../lib/libmysqlclient.a lib + + echo `date +"%x %X"` "Exporting the mysqlapi conan package." + conan export-pkg . mysqlapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable -pr profile.txt --force + + echo `date +"%x %X"` "Uploading the mysqlapi conan package to JFrog." + conan upload -r jfrog mysqlapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable --all --no-overwrite recipe + + popd +} + + +generate_sparrowapi_conan_pack() { + pushd $1 + + rm -Rf _conan/sparrowapi + mkdir -p _conan/sparrowapi + cd _conan/sparrowapi + + cp $BUILD_SCRIPTS_FOLDER/conan/lnx_64/profile.txt profile.txt + cp $BUILD_SCRIPTS_FOLDER/conan/conanfile.sparrowapi.py conanfile.py + sed -E "s/version[ \t]*=[ \t]*\".*\"/version = \"$SPARROW_BUILD_NUM\"/" conanfile.py > conanfile.new.py + mv -f conanfile.new.py conanfile.py + rm -f conanfile.new.py + + if [ $BUILD_MODE = "debug" ]; then + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Debug/" profile.txt > profile.new.txt + else + sed -E "s/BUILD_MODE[ \t]*=[ \t]*.*/BUILD_MODE=Release/" profile.txt > profile.new.txt + fi + mv -f profile.new.txt profile.txt + rm -f profile.new.txt + + mkdir lib include + cp ../../lib/libsparrowapi.so lib + cp -r $SRC_DIR/storage/sparrow/api/include/* include + + echo `date +"%x %X"` "Exporting the sparrowapi conan package." + conan export-pkg . sparrowapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable -pr profile.txt --force + + echo `date +"%x %X"` "Uploading the sparrowapi conan package to JFrog." + conan upload -r jfrog sparrowapi/${SPARROW_BUILD_NUM}@ativanet-poller/stable --all --no-overwrite recipe + + popd +} + +# ---------------- STARTING HERE --------------------- +# env | sort; + +build_dir=$SRC_DIR/_build/win64 +distrib_dir=$SRC_DIR/_distrib/win64 +build_dir_arch=$build_dir/$BUILD_MODE + + + +# This has been deprecated and removed in MySQL 8.0. See https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-0.html#:~:text=The%20deprecated%20mysql_install_db +# Generate initial database. +# echo `date +"%x %X"` "Generating initial database" +# pushd _CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 +# scripts/mysql_install_db --datadir=./data +# popd +# pushd _CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64/data +# rm -rf test + +# echo `date +"%x %X"` "Zipping initial database files from `pwd` into $distrib_dir/mysql_data.zip" +# rm -f $distrib_dir/mysql_data.zip +# zip -r -9 $distrib_dir/mysql_data.zip * +# popd + +# echo `date +"%x %X"` "Zipping distribution packages, format for ivserver 6.1." +# generate _CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir/mysql_${BUILD_MODE}.zip $distrib_dir/sparrowapi_${BUILD_MODE}.zip $distrib_dir/mysqlapi_${BUILD_MODE}.zip + +echo `date +"%x %X"` "Zipping distribution package containing binaries and configuration files to be deployed." +generate_distrib_pack $build_dir_arch $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip +if [ $? -ne 0 ]; then + echo Failed to generate distribution package. + exit 1 +fi + +echo `date +"%x %X"` "Uploading distribution package $distrib_dir/sparrow-$SPARROW_BUILD_NUM.zip to JFrog generic package repository." +# curl --header "Authorization: Bearer $CI_JOB_TOKEN" --upload-file $distrib_dir/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip $CI_API_V4_URL/projects/$CI_PROJECT_ID/packages/generic/sparrow/$SPARROW_BUILD_NUM/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip?select=package_file +curl -u ${RELEASE_GENERIC_USER}:${RELEASE_GENERIC_PASSWORD} --upload-file $distrib_dir/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip ${RELEASE_GENERIC_REPO}/sparrow/$SPARROW_BUILD_NUM/sparrow-$SPARROW_BUILD_NUM-x64-${BUILD_MODE}.zip +if [ $? -ne 0 ]; then + echo Failed to upload distribution package. + exit 1 +fi + +echo `date +"%x %X"` "Packaging the mysql api for conan" +generate_mysqlapi_conan_pack $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 +if [ $? -ne 0 ]; then + echo Failed to make or upload conan package for the mysql api. + exit 1 +fi + +echo `date +"%x %X"` "Packaging the sparrow api for conan" +generate_sparrowapi_conan_pack $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 +if [ $? -ne 0 ]; then + echo Failed to make or upload conan package for the sparrow api. + exit 1 +fi + +# echo `date +"%x %X"` Cleaning up +# rm -rf _CPack_Packages +# make clean + +echo `date +"%x %X"` $BUILD_MODE build finished From dcc572be4256277bb51d450e55ce6630cac3fb68 Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Tue, 21 Jan 2025 10:56:12 +0100 Subject: [PATCH 3/8] AB#340942 Added a GitHub workflow --- .github/workflows/ci.yml | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000000..1dce36aefe69 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,53 @@ +name: sparrow-ci-workflow +on: + # Triggers the pipeline on push, on PRs to "main", manually + push: + tags: + - '[0-9]+.[0-9]+.[0-9]+**' + - '![0-9]+.[0-9]+.[0-9]+-DBG**' + workflow_dispatch: + +env: + DEBUG: ${{ contains(github.ref_name, 'DBG') }} + MODE: ${{ contains(github.ref_name, 'DBG') && 'debug' || 'release' }} + +jobs: + build-job-lnx8-x64: + needs: [] + runs-on: ubuntu-latest + container: + image: ghcr.io/infovista-opensource/mysql-server-timeseries-builder-images/mysql8-build-lnx8:0.48 + # volumes: + # - /data/conan:/data/conan + + steps: + - name: Cache Conan packages + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-lnx8-x64 + path: /data/conan + + - uses: actions/checkout@v4 + + - name: Building binaries + shell: bash + run: | + echo "Working folder `pwd`" + env | sort + chmod a+x build_scripts\linux\build-linux-github.sh + build_scripts\linux\build-linux-github.sh ${{ env.MODE }} + + - name: Uploading binaries + uses: actions/upload-artifact@v4 + with: + name: lnx8-x64-binaries + path: | + _distrib/lnx8_64/*.tar.gz + **/*.log + + - name: Uploading release assets + uses: alexellis/upload-assets@0.4.1 + env: + GITHUB_TOKEN: ${{ github.token }} + with: + asset_paths: '["_distrib/lnx8_64/*.tar.gz"]' \ No newline at end of file From 15d9c1b81dc5269adb4c8becc054f6764ff99911 Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Tue, 21 Jan 2025 11:05:43 +0100 Subject: [PATCH 4/8] AB#340942 Fixing script path and adding some troubleshooting commands --- .github/workflows/ci.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1dce36aefe69..ebfae342ebe9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,9 +33,13 @@ jobs: shell: bash run: | echo "Working folder `pwd`" + echo "Build mode is ${{ env.MODE }}" env | sort - chmod a+x build_scripts\linux\build-linux-github.sh - build_scripts\linux\build-linux-github.sh ${{ env.MODE }} + ls -l + echo "Conan package folder contains: `ls -l /data/conan`" + conan search + chmod a+x build_scripts/linux/build-linux-github.sh + build_scripts/linux/build-linux-github.sh ${{ env.MODE }} - name: Uploading binaries uses: actions/upload-artifact@v4 From 741c2ec44b0cef83aa051805655ca7c06dd734f6 Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Tue, 21 Jan 2025 11:27:26 +0100 Subject: [PATCH 5/8] AB#340942 Added CI_COMMIT_TAG --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebfae342ebe9..b8d634355942 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,8 @@ jobs: - name: Building binaries shell: bash + env: + CI_COMMIT_TAG: ${{ github.ref }} run: | echo "Working folder `pwd`" echo "Build mode is ${{ env.MODE }}" From 12d5e56798d5035a7d3a2e9db2c62275f1bb35c7 Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Tue, 21 Jan 2025 13:23:53 +0100 Subject: [PATCH 6/8] AB#340942 Fixed value of CI_COMMIT_TAG --- .github/workflows/ci.yml | 2 +- build_scripts/linux/compile-package-linux.sh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b8d634355942..5590f6ef678d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: - name: Building binaries shell: bash env: - CI_COMMIT_TAG: ${{ github.ref }} + CI_COMMIT_TAG: ${{ github.ref_name }} run: | echo "Working folder `pwd`" echo "Build mode is ${{ env.MODE }}" diff --git a/build_scripts/linux/compile-package-linux.sh b/build_scripts/linux/compile-package-linux.sh index 63854e5a2f03..5417ea63ccd6 100755 --- a/build_scripts/linux/compile-package-linux.sh +++ b/build_scripts/linux/compile-package-linux.sh @@ -332,19 +332,21 @@ if [ "$DO_NOT_PACK" = true ]; then exit 0 fi -echo `date +"%x %X"` "Zipping distribution package containing binaries and configuration files to be deployed." +echo `date +"%x %X"` "Generating the distribution package which contains the binaries and configuration files to be deployed." generate_distrib_pack $build_dir_arch $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir sparrow-distrib-$SPARROW_BUILD_NUM-x64-${BUILD_MODE} if [ $? -ne 0 ]; then echo Failed to generate distribution package. exit 1 fi +echo `date +"%x %X"` "Generating the MySQL API package which includes the header files and the library." generate_mysqlapi_pack $build_dir_arch $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir mysqlapi-$SPARROW_BUILD_NUM-x64-${BUILD_MODE} if [ $? -ne 0 ]; then echo Failed to generate MySQL API package. exit 1 fi +echo `date +"%x %X"` "Generating the Sparrow API package which includes the header files and the library." generate_sparrowapi_pack $build_dir_arch $build_dir_arch/_CPack_Packages/Linux/TGZ/mysql-$MYSQL_TAG-linux-x86_64 $distrib_dir sparrowapi-$SPARROW_BUILD_NUM-x64-${BUILD_MODE} if [ $? -ne 0 ]; then echo Failed to generate Sparrow API package. From b294a9ae879e02258d68daf4cc38d819dcc4a5f3 Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Fri, 24 Jan 2025 13:57:53 +0100 Subject: [PATCH 7/8] AB#340942 Copy symbolic links to libmysqlclient.so in distribution packages --- build_scripts/linux/compile-package-linux.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/build_scripts/linux/compile-package-linux.sh b/build_scripts/linux/compile-package-linux.sh index 5417ea63ccd6..97730d532f23 100755 --- a/build_scripts/linux/compile-package-linux.sh +++ b/build_scripts/linux/compile-package-linux.sh @@ -61,10 +61,10 @@ generate_distrib_pack() { echo `date +"%x %X"` "Gathering all required files for a DB server installation." distrib_folder=_distrib_tmp/mysql_${BUILD_MODE} mkdir -p $distrib_folder - cp -r share bin lib $distrib_folder + cp -ra share bin lib $distrib_folder echo `date +"%x %X"` "Gathering files for libmysqlclient API." - cp lib/libmysqlclient.so $distrib_folder/lib + cp -a lib/libmysqlclient.so* $distrib_folder/lib echo `date +"%x %X"` "Gathering files for Sparrow UDF plugin." mkdir -p $distrib_folder/lib/plugin @@ -73,7 +73,6 @@ generate_distrib_pack() { echo `date +"%x %X"` "Packaging everything into the compressed file $3/$4." cd $distrib_folder tar -czvf $3/$4.tar.gz * - # zip -r -3 $3/$4 * res=$? if [ $? -ne 0 ]; then echo `date +"%x %X"` "Tar gzip all files into a package failed." @@ -100,7 +99,7 @@ generate_mysqlapi_pack() { mkdir lib include cp -r ../../include/* include - cp ../../lib/libmysqlclient.so ../../lib/libmysqlclient.a lib + cp -a ../../lib/libmysqlclient.so* ../../lib/libmysqlclient.a lib echo `date +"%x %X"` "Packaging everything into the compressed file $3/$4." tar -czvf $3/$4.tar.gz * @@ -130,8 +129,8 @@ generate_sparrowapi_pack() { cd $distrib_folder mkdir lib include - cp ../../lib/libsparrowapi.so lib cp -r $SOURCE_ROOT_FOLDER/storage/sparrow/api/include/* include + cp -a ../../lib/libsparrowapi.so* lib echo `date +"%x %X"` "Packaging everything into the compressed file $3/$4." tar -czvf $3/$4.tar.gz * From 56e9ca0ec0ce152562e1f5e57879f99439a6cd58 Mon Sep 17 00:00:00 2001 From: Brendan Plougonven Date: Thu, 27 Feb 2025 13:53:35 +0100 Subject: [PATCH 8/8] AB#340942 Adapt Sparrow to libmysqlclient apis from MySQL 5.6 --- storage/sparrow/engine/condition.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/sparrow/engine/condition.cc b/storage/sparrow/engine/condition.cc index f217dc112150..070cb9695d3d 100644 --- a/storage/sparrow/engine/condition.cc +++ b/storage/sparrow/engine/condition.cc @@ -395,7 +395,7 @@ TimePeriods Condition::get(TABLE* table, Item* item, const bool returnAllIfNone) // STATIC uint64_t Condition::getBound(Item* item, const bool isTimestamp) { Item_result res_type = item->result_type(); - if (res_type == INT_RESULT) { + if (res_type == INT_RESULT || res_type == DECIMAL_RESULT) { if (item->val_int() < 0) return -1; return item->val_int() * 1000ULL; } else if (res_type == STRING_RESULT) {