Skip to content

Commit

Permalink
erasure-code: store and compare encoded contents
Browse files Browse the repository at this point in the history
Introduce ceph_erasure_code_non_regression to check and compare how an
erasure code plugin encodes and decodes content with a given set of
parameters. For instance:

./ceph_erasure_code_non_regression \
      --plugin jerasure \
      --parameter technique=reed_sol_van \
      --parameter k=2 \
      --parameter m=2 \
      --stripe-width 3181 \
      --create \
      --check

Will create an encoded object (--create) and store it into a directory
along with the chunks, one chunk per file. The directory name is derived
from the parameters. The content of the object is a random pattern of 31
bytes repeated to fill the object size specified with --stripe-width.

The check function (--check) reads the object back from the file,
encodes it and compares the result with the content of the chunks read
from the files. It also attempts recover from one or two erasures.

Chunks encoded by a given version of Ceph are expected to be encoded
exactly in the same way by all Ceph versions going forward.

http://tracker.ceph.com/issues/9420 Refs: ceph#9420

Signed-off-by: Loic Dachary <[email protected]>
Loic Dachary authored and ldachary committed Nov 10, 2014
1 parent 4b07381 commit f590130
Showing 3 changed files with 334 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/.gitignore
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@ Makefile
/ceph_multi_stress_watch
/ceph_erasure_code
/ceph_erasure_code_benchmark
/ceph_erasure_code_non_regression
/ceph_psim
/ceph_radosacl
/ceph_rgw_jsonparser
8 changes: 8 additions & 0 deletions src/test/erasure-code/Makefile.am
Original file line number Diff line number Diff line change
@@ -13,6 +13,14 @@ bin_DEBUGPROGRAMS += ceph_erasure_code_benchmark
noinst_HEADERS += \
test/erasure-code/ceph_erasure_code_benchmark.h

ceph_erasure_code_non_regression_SOURCES = \
test/erasure-code/ceph_erasure_code_non_regression.cc
ceph_erasure_code_non_regression_LDADD = $(LIBOSD) $(LIBCOMMON) $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL)
if LINUX
ceph_erasure_code_non_regression_LDADD += -ldl
endif
noinst_PROGRAMS += ceph_erasure_code_non_regression

ceph_erasure_code_SOURCES = \
test/erasure-code/ceph_erasure_code.cc
ceph_erasure_code_LDADD = $(LIBOSD) $(LIBCOMMON) $(BOOST_PROGRAM_OPTIONS_LIBS) $(CEPH_GLOBAL)
325 changes: 325 additions & 0 deletions src/test/erasure-code/ceph_erasure_code_non_regression.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,325 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph distributed storage system
*
* Red Hat (C) 2014 Red Hat <[email protected]>
*
* Author: Loic Dachary <[email protected]>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
*/

#include <errno.h>
#include <boost/scoped_ptr.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/program_options/option.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/variables_map.hpp>
#include <boost/program_options/cmdline.hpp>
#include <boost/program_options/parsers.hpp>
#include <boost/algorithm/string.hpp>

#include "global/global_context.h"
#include "global/global_init.h"
#include "common/errno.h"
#include "common/ceph_argparse.h"
#include "common/config.h"
#include "erasure-code/ErasureCodePlugin.h"

namespace po = boost::program_options;
using namespace std;

class ErasureCodeNonRegression {
unsigned stripe_width;
string plugin;
bool create;
bool check;
string base;
string directory;
map<string,string> parameters;
public:
int setup(int argc, char** argv);
int run();
int run_create();
int run_check();
int decode_erasures(ErasureCodeInterfaceRef erasure_code,
set<int> erasures,
map<int,bufferlist> chunks);
string content_path();
string chunk_path(unsigned int chunk);
};

int ErasureCodeNonRegression::setup(int argc, char** argv) {

po::options_description desc("Allowed options");
desc.add_options()
("help,h", "produce help message")
("stripe-width,s", po::value<int>()->default_value(4 * 1024),
"stripe_width, i.e. the size of the buffer to be encoded")
("plugin,p", po::value<string>()->default_value("jerasure"),
"erasure code plugin name")
("base", po::value<string>()->default_value("."),
"prefix all paths with base")
("parameter,P", po::value<vector<string> >(),
"parameters")
("create", "create the erasure coded content in the directory")
("check", "check the content in the directory matches the chunks and vice versa")
;

po::variables_map vm;
po::parsed_options parsed =
po::command_line_parser(argc, argv).options(desc).allow_unregistered().run();
po::store(
parsed,
vm);
po::notify(vm);

vector<const char *> ceph_options, def_args;
vector<string> ceph_option_strings = po::collect_unrecognized(
parsed.options, po::include_positional);
ceph_options.reserve(ceph_option_strings.size());
for (vector<string>::iterator i = ceph_option_strings.begin();
i != ceph_option_strings.end();
++i) {
ceph_options.push_back(i->c_str());
}

global_init(
&def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT,
CODE_ENVIRONMENT_UTILITY,
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
common_init_finish(g_ceph_context);
g_ceph_context->_conf->apply_changes(NULL);

if (vm.count("help")) {
cout << desc << std::endl;
return 1;
}

stripe_width = vm["stripe-width"].as<int>();
plugin = vm["plugin"].as<string>();
base = vm["base"].as<string>();
check = vm.count("check") > 0;
create = vm.count("create") > 0;

if (!check && !create) {
cerr << "must specifify either --check or --create" << endl;
return 1;
}

{
stringstream path;
path << base << "/" << "plugin=" << plugin << " stipe-width=" << stripe_width;
directory = path.str();
}

if (vm.count("parameter")) {
const vector<string> &p = vm["parameter"].as< vector<string> >();
for (vector<string>::const_iterator i = p.begin();
i != p.end();
++i) {
std::vector<std::string> strs;
boost::split(strs, *i, boost::is_any_of("="));
if (strs.size() != 2) {
cerr << "--parameter " << *i << " ignored because it does not contain exactly one =" << endl;
} else {
parameters[strs[0]] = strs[1];
}
if (strs[0] != "directory")
directory += " " + *i;
}
}
if (parameters.count("directory") == 0)
parameters["directory"] = ".libs";

return 0;
}

int ErasureCodeNonRegression::run()
{
int ret = 0;
if(create && (ret = run_create()))
return ret;
if(check && (ret = run_check()))
return ret;
return ret;
}

int ErasureCodeNonRegression::run_create()
{
ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance();
ErasureCodeInterfaceRef erasure_code;
stringstream messages;
int code = instance.factory(plugin, parameters, &erasure_code, messages);
if (code) {
cerr << messages.str() << endl;
return code;
}

if (::mkdir(directory.c_str(), 0755)) {
cerr << "mkdir(" << directory << "): " << cpp_strerror(errno) << endl;
return 1;
}
unsigned payload_chunk_size = 37;
string payload;
for (unsigned j = 0; j < payload_chunk_size; ++j)
payload.push_back('a' + (rand() % 26));
bufferlist in;
for (unsigned j = 0; j < stripe_width; j += payload_chunk_size)
in.append(payload);
if (stripe_width < in.length())
in.splice(stripe_width, in.length() - stripe_width);
if (in.write_file(content_path().c_str()))
return 1;
set<int> want_to_encode;
for (unsigned int i = 0; i < erasure_code->get_chunk_count(); i++) {
want_to_encode.insert(i);
}
map<int,bufferlist> encoded;
code = erasure_code->encode(want_to_encode, in, &encoded);
if (code)
return code;
for (map<int,bufferlist>::iterator chunk = encoded.begin();
chunk != encoded.end();
chunk++) {
if (chunk->second.write_file(chunk_path(chunk->first).c_str()))
return 1;
}
return 0;
}

int ErasureCodeNonRegression::decode_erasures(ErasureCodeInterfaceRef erasure_code,
set<int> erasures,
map<int,bufferlist> chunks)
{
map<int,bufferlist> available;
for (map<int,bufferlist>::iterator chunk = chunks.begin();
chunk != chunks.end();
++chunk) {
if (erasures.count(chunk->first) == 0)
available[chunk->first] = chunk->second;

}
map<int,bufferlist> decoded;
int code = erasure_code->decode(erasures, available, &decoded);
if (code)
return code;
for (set<int>::iterator erasure = erasures.begin();
erasure != erasures.end();
++erasure) {
if (!chunks[*erasure].contents_equal(decoded[*erasure])) {
cerr << "chunk " << *erasure << " incorrectly recovered" << endl;
return 1;
}
}
return 0;
}

int ErasureCodeNonRegression::run_check()
{
ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance();
ErasureCodeInterfaceRef erasure_code;
stringstream messages;
int code = instance.factory(plugin, parameters, &erasure_code, messages);
if (code) {
cerr << messages.str() << endl;
return code;
}
string errors;
bufferlist in;
if (in.read_file(content_path().c_str(), &errors)) {
cerr << errors << endl;
return 1;
}
set<int> want_to_encode;
for (unsigned int i = 0; i < erasure_code->get_chunk_count(); i++) {
want_to_encode.insert(i);
}

map<int,bufferlist> encoded;
code = erasure_code->encode(want_to_encode, in, &encoded);
if (code)
return code;

for (map<int,bufferlist>::iterator chunk = encoded.begin();
chunk != encoded.end();
chunk++) {
bufferlist existing;
if (existing.read_file(chunk_path(chunk->first).c_str(), &errors)) {
cerr << errors << endl;
return 1;
}
bufferlist &old = chunk->second;
if (existing.length() != old.length() ||
memcmp(existing.c_str(), old.c_str(), old.length())) {
cerr << "chunk " << chunk->first << " encodes differently" << endl;
return 1;
}
}

// erasing a single chunk is likely to use a specific code path in every plugin
set<int> erasures;
erasures.clear();
erasures.insert(0);
code = decode_erasures(erasure_code, erasures, encoded);
if (code)
return code;

if (erasure_code->get_chunk_count() - erasure_code->get_data_chunk_count() > 1) {
// erasing two chunks is likely to be the general case
erasures.clear();
erasures.insert(0);
erasures.insert(erasure_code->get_chunk_count() - 1);
code = decode_erasures(erasure_code, erasures, encoded);
if (code)
return code;
}

return 0;
}

string ErasureCodeNonRegression::content_path()
{
stringstream path;
path << directory << "/content";
return path.str();
}

string ErasureCodeNonRegression::chunk_path(unsigned int chunk)
{
stringstream path;
path << directory << "/" << chunk;
return path.str();
}

int main(int argc, char** argv) {
ErasureCodeNonRegression non_regression;
int err = non_regression.setup(argc, argv);
if (err)
return err;
return non_regression.run();
}

/*
* Local Variables:
* compile-command: "cd ../.. ; make -j4 &&
* make ceph_erasure_code_non_regression &&
* libtool --mode=execute valgrind --tool=memcheck --leak-check=full \
* ./ceph_erasure_code_non_regression \
* --plugin jerasure \
* --parameter directory=.libs \
* --parameter technique=reed_sol_van \
* --parameter k=2 \
* --parameter m=2 \
* --directory /tmp/ceph_erasure_code_non_regression \
* --stripe-width 3181 \
* --create \
* --check
* "
* End:
*/

0 comments on commit f590130

Please sign in to comment.