Skip to content

Commit

Permalink
Tune SQLite
Browse files Browse the repository at this point in the history
This change makes sure the b-tree page size isn't 1024 bytes. It also enables
WAL mode. This means TensorBoard can perform reads at the same time as
TensorFlow is performing writes.

We now also fsync() less often. This shouldn't carry any risk of database
corruption in WAL mode. Since WAL mode uses shared memory, writes become
immediately available to other processes, but they won't become durable until
after the OS decides to flush the FS cache.

This makes the DB writer faster than the file writer, at least in cases where
the DB is tiny. We probably make it go faster still, once we find a way to use
transactions.

Name                      Cold ?s   Average ?s  Flushing ?s       Size B
?i.i                        1,920           69            0            0
Scalar 1.0 FS               1,623          337        4,258       11,348
Scalar 1.0 TB FS            3,137          527        4,213       17,023
Scalar 2.0 FS               3,319          681        3,917       11,348
Scalar 2.0 DB               2,601          578          217      118,784
Tensor 1.0 FS 4             6,397          558        4,276       14,215
Tensor 2.0 FS 4             1,678          613        3,971       24,455
Tensor 2.0 DB 4             3,605          278          313      118,784
Tensor 1.0 FS 128           1,857          289        4,397       47,111
Tensor 2.0 FS 128           3,558          721       10,894       57,351
Tensor 2.0 DB 128           3,508          585          203      118,784
Tensor 1.0 FS 8192          2,677          525        4,400    2,119,816
Tensor 2.0 FS 8192          2,248          822        4,006    2,130,056
Tensor 2.0 DB 8192          4,346          370          449      126,976

PiperOrigin-RevId: 178666363
  • Loading branch information
jart authored and tensorflower-gardener committed Dec 11, 2017
1 parent 80ac330 commit db198b8
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 9 deletions.
1 change: 1 addition & 0 deletions tensorflow/core/kernels/summary_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class CreateSummaryDbWriterOp : public OpKernel {
SummaryWriterInterface* s;
auto db = Sqlite::Open(db_uri);
OP_REQUIRES_OK(ctx, db.status());
db.ValueOrDie()->UseWriteAheadLogWithReducedDurabilityIfPossible();
OP_REQUIRES_OK(
ctx, CreateSummaryDbWriter(std::move(db.ValueOrDie()), experiment_name,
run_name, user_name, ctx->env(), &s));
Expand Down
57 changes: 51 additions & 6 deletions tensorflow/core/lib/db/sqlite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,36 @@ limitations under the License.
#include "tensorflow/core/platform/logging.h"

namespace tensorflow {
namespace {

void ExecuteOrLog(Sqlite* db, const char* sql) {
Status s = db->Prepare(sql).StepAndReset();
if (!s.ok()) {
LOG(WARNING) << s.ToString();
}
}

string ExecuteOrEmpty(Sqlite* db, const char* sql) {
auto stmt = db->Prepare(sql);
bool is_done = false;
if (stmt.Step(&is_done).ok() && !is_done) {
return stmt.ColumnString(0);
}
return "";
}

} // namespace

/* static */
xla::StatusOr<std::shared_ptr<Sqlite>> Sqlite::Open(const string& uri) {
sqlite3* sqlite = nullptr;
Status s = MakeStatus(sqlite3_open(uri.c_str(), &sqlite));
if (s.ok()) {
return std::shared_ptr<Sqlite>(new Sqlite(sqlite));
}
return s;
TF_RETURN_IF_ERROR(MakeStatus(sqlite3_open(uri.c_str(), &sqlite)));
Sqlite* db = new Sqlite(sqlite, uri);
// This is the SQLite default since 2016. However it's good to set
// this anyway, since we might get linked against an older version of
// the library, and it's pretty much impossible to change later.
ExecuteOrLog(db, "PRAGMA page_size=4096");
return std::shared_ptr<Sqlite>(db);
}

/* static */ Status Sqlite::MakeStatus(int resultCode) {
Expand Down Expand Up @@ -75,7 +96,7 @@ xla::StatusOr<std::shared_ptr<Sqlite>> Sqlite::Open(const string& uri) {
}
}

Sqlite::Sqlite(sqlite3* db) : db_(db) {}
Sqlite::Sqlite(sqlite3* db, const string& uri) : db_(db), uri_(uri) {}

Sqlite::~Sqlite() {
// close_v2 doesn't care if a stmt hasn't been GC'd yet
Expand All @@ -97,6 +118,30 @@ Status Sqlite::Close() {
return s;
}

void Sqlite::UseWriteAheadLogWithReducedDurabilityIfPossible() {
// TensorFlow summaries are intensively write-heavy, cf. most apps.
// This pragma loves writes and means that TensorBoard can read the
// database even as the training job inserts stuff. In other words,
// this makes SQLite almost as powerful as MySQL or PostgreSQL.
// https://www.sqlite.org/wal.html
string journal = ExecuteOrEmpty(this, "PRAGMA journal_mode=wal");
if (journal != "wal") {
LOG(WARNING) << "Failed to set journal_mode=wal because SQLite wants "
<< uri_ << " to be in '" << journal << "' mode, which might "
<< "be bad since WAL is important for the performance of "
<< "write-intensive apps. This might only happen for memory "
<< "databases or old versions of SQLite, but is definitely "
<< "worth fixing if that's not the case";
} else {
// This setting means we might lose transactions due to power loss,
// but the database can't become corrupted. In exchange, we get the
// the performance of a NoSQL database. This is a trade-off most data
// scientists would consider acceptable.
// https://www.sqlite.org/pragma.html#pragma_synchronous
ExecuteOrLog(this, "PRAGMA synchronous=NORMAL");
}
}

SqliteStatement Sqlite::Prepare(const string& sql) {
sqlite3_stmt* stmt = nullptr;
int rc = sqlite3_prepare_v2(db_, sql.c_str(), sql.size() + 1, &stmt, nullptr);
Expand Down
14 changes: 11 additions & 3 deletions tensorflow/core/lib/db/sqlite.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_LIB_DB_SQLITE_H_
#define TENSORFLOW_CORE_LIB_DB_SQLITE_H_

#include <stddef.h>
#include <cstddef>
#include <memory>
#include <utility>

Expand Down Expand Up @@ -69,6 +69,13 @@ class Sqlite {
/// beforehand. This is a no-op if already closed
Status Close();

/// \brief Enables WAL mode with less fsync or log a warning.
///
/// The synchronous pragma is only set to NORMAL if WAL mode was
/// successfully enabled. This must be called immediately after
/// creating the object.
void UseWriteAheadLogWithReducedDurabilityIfPossible();

/// \brief Creates SQLite statement.
///
/// Call result.status() to determine whether or not this operation
Expand All @@ -78,8 +85,9 @@ class Sqlite {
SqliteStatement Prepare(const string& sql);

private:
explicit Sqlite(sqlite3* db);
explicit Sqlite(sqlite3* db, const string& uri);
sqlite3* db_;
string uri_;
TF_DISALLOW_COPY_AND_ASSIGN(Sqlite);
};

Expand All @@ -103,7 +111,7 @@ class SqliteStatement {
SqliteStatement& operator=(SqliteStatement&& other);

/// \brief Returns true if statement is not empty.
operator bool() const { return stmt_ != nullptr; }
explicit operator bool() const { return stmt_ != nullptr; }

/// \brief Returns SQLite result code state.
///
Expand Down

0 comments on commit db198b8

Please sign in to comment.