Skip to content

Commit

Permalink
ARROW-1396: [C++] Add PrettyPrint for schemas that outputs dictionaries
Browse files Browse the repository at this point in the history
Author: Wes McKinney <[email protected]>

Closes apache#1051 from wesm/ARROW-1396 and squashes the following commits:

11a8b69 [Wes McKinney] Fix cpplint error
81637ea [Wes McKinney] clang-format
dfd4692 [Wes McKinney] Use PrettyPrint with schemas in Python
43652bc [Wes McKinney] Add PrettyPrint functions for Schema that also print dictionaries
f097675 [Wes McKinney] First draft of PrettyPrint for schemas, untested. Name DataType::name a virtual
  • Loading branch information
wesm committed Sep 6, 2017
1 parent e9f3a12 commit a3514a3
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 71 deletions.
40 changes: 39 additions & 1 deletion cpp/src/arrow/pretty_print-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ void CheckArray(const Array& arr, int indent, const char* expected) {
ASSERT_EQ(result, ss.str());
}

template <typename T>
void Check(const T& obj, const PrettyPrintOptions& options, const char* expected) {
std::string result;
ASSERT_OK(PrettyPrint(obj, options, &result));
ASSERT_EQ(std::string(expected, strlen(expected)), result);
}

template <typename TYPE, typename C_TYPE>
void CheckPrimitive(int indent, const std::vector<bool>& is_valid,
const std::vector<C_TYPE>& values, const char* expected) {
Expand Down Expand Up @@ -117,7 +124,38 @@ TEST_F(TestPrettyPrint, DictionaryType) {
-- dictionary: ["foo", "bar", "baz"]
-- indices: [1, 2, null, 0, 2, 0])expected";

CheckArray(*arr.get(), 0, expected);
CheckArray(*arr, 0, expected);
}

TEST_F(TestPrettyPrint, SchemaWithDictionary) {
std::vector<bool> is_valid = {true, true, false, true, true, true};

std::shared_ptr<Array> dict;
std::vector<std::string> dict_values = {"foo", "bar", "baz"};
ArrayFromVector<StringType, std::string>(dict_values, &dict);

auto simple = field("one", int32());
auto simple_dict = field("two", dictionary(int16(), dict));
auto list_of_dict = field("three", list(simple_dict));

auto struct_with_dict = field("four", struct_({simple, simple_dict}));

auto sch = schema({simple, simple_dict, list_of_dict, struct_with_dict});

static const char* expected = R"expected(one: int32
two: dictionary<values=string, indices=int16, ordered=0>
dictionary: ["foo", "bar", "baz"]
three: list<two: dictionary<values=string, indices=int16, ordered=0>>
child 0, two: dictionary<values=string, indices=int16, ordered=0>
dictionary: ["foo", "bar", "baz"]
four: struct<one: int32, two: dictionary<values=string, indices=int16, ordered=0>>
child 0, one: int32
child 1, two: dictionary<values=string, indices=int16, ordered=0>
dictionary: ["foo", "bar", "baz"])expected";

PrettyPrintOptions options{0};

Check(*sch, options, expected);
}

} // namespace arrow
148 changes: 117 additions & 31 deletions cpp/src/arrow/pretty_print.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,56 @@

namespace arrow {

class ArrayPrinter {
class PrettyPrinter {
public:
PrettyPrinter(int indent, std::ostream* sink) : indent_(indent), sink_(sink) {}

void Write(const char* data);
void Write(const std::string& data);
void WriteIndented(const char* data);
void WriteIndented(const std::string& data);
void Newline();
void Indent();
void OpenArray();
void CloseArray();

protected:
int indent_;
std::ostream* sink_;
};

void PrettyPrinter::OpenArray() { (*sink_) << "["; }

void PrettyPrinter::CloseArray() { (*sink_) << "]"; }

void PrettyPrinter::Write(const char* data) { (*sink_) << data; }
void PrettyPrinter::Write(const std::string& data) { (*sink_) << data; }

void PrettyPrinter::WriteIndented(const char* data) {
Indent();
Write(data);
}

void PrettyPrinter::WriteIndented(const std::string& data) {
Indent();
Write(data);
}

void PrettyPrinter::Newline() {
(*sink_) << "\n";
Indent();
}

void PrettyPrinter::Indent() {
for (int i = 0; i < indent_; ++i) {
(*sink_) << " ";
}
}

class ArrayPrinter : public PrettyPrinter {
public:
ArrayPrinter(const Array& array, int indent, std::ostream* sink)
: array_(array), indent_(indent), sink_(sink) {}
: PrettyPrinter(indent, sink), array_(array) {}

template <typename T>
inline typename std::enable_if<IsInteger<T>::value, void>::type WriteDataValues(
Expand Down Expand Up @@ -136,13 +182,6 @@ class ArrayPrinter {
}
}

void Write(const char* data);
void Write(const std::string& data);
void Newline();
void Indent();
void OpenArray();
void CloseArray();

Status Visit(const NullArray& array) { return Status::OK(); }

template <typename T>
Expand Down Expand Up @@ -250,9 +289,6 @@ class ArrayPrinter {

private:
const Array& array_;
int indent_;

std::ostream* sink_;
};

Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
Expand All @@ -269,24 +305,6 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
}
}

void ArrayPrinter::OpenArray() { (*sink_) << "["; }
void ArrayPrinter::CloseArray() { (*sink_) << "]"; }

void ArrayPrinter::Write(const char* data) { (*sink_) << data; }

void ArrayPrinter::Write(const std::string& data) { (*sink_) << data; }

void ArrayPrinter::Newline() {
(*sink_) << "\n";
Indent();
}

void ArrayPrinter::Indent() {
for (int i = 0; i < indent_; ++i) {
(*sink_) << " ";
}
}

Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) {
ArrayPrinter printer(arr, indent, sink);
return printer.Print();
Expand All @@ -302,8 +320,76 @@ Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink) {
return Status::OK();
}

Status ARROW_EXPORT DebugPrint(const Array& arr, int indent) {
Status DebugPrint(const Array& arr, int indent) {
return PrettyPrint(arr, indent, &std::cout);
}

class SchemaPrinter : public PrettyPrinter {
public:
SchemaPrinter(const Schema& schema, int indent, std::ostream* sink)
: PrettyPrinter(indent, sink), schema_(schema) {}

Status PrintType(const DataType& type);
Status PrintField(const Field& field);

Status Print() {
for (int i = 0; i < schema_.num_fields(); ++i) {
if (i > 0) {
Newline();
}
RETURN_NOT_OK(PrintField(*schema_.field(i)));
}
return Status::OK();
}

private:
const Schema& schema_;
};

Status SchemaPrinter::PrintType(const DataType& type) {
Write(type.ToString());
if (type.id() == Type::DICTIONARY) {
Newline();

indent_ += 2;
WriteIndented("dictionary: ");
const auto& dict_type = static_cast<const DictionaryType&>(type);
RETURN_NOT_OK(PrettyPrint(*dict_type.dictionary(), indent_, sink_));
indent_ -= 2;
} else {
for (int i = 0; i < type.num_children(); ++i) {
Newline();

std::stringstream ss;
ss << "child " << i << ", ";

indent_ += 2;
WriteIndented(ss.str());
RETURN_NOT_OK(PrintField(*type.child(i)));
indent_ -= 2;
}
}
return Status::OK();
}

Status SchemaPrinter::PrintField(const Field& field) {
Write(field.name());
Write(": ");
return PrintType(*field.type());
}

Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
std::ostream* sink) {
SchemaPrinter printer(schema, options.indent, sink);
return printer.Print();
}

Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
std::string* result) {
std::ostringstream sink;
RETURN_NOT_OK(PrettyPrint(schema, options, &sink));
*result = sink.str();
return Status::OK();
}

} // namespace arrow
21 changes: 18 additions & 3 deletions cpp/src/arrow/pretty_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define ARROW_PRETTY_PRINT_H

#include <ostream>
#include <string>

#include "arrow/type_fwd.h"
#include "arrow/util/visibility.h"
Expand All @@ -32,10 +33,24 @@ struct PrettyPrintOptions {
int indent;
};

Status ARROW_EXPORT PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink);
Status ARROW_EXPORT PrettyPrint(const Array& arr, int indent, std::ostream* sink);
/// \brief Print human-readable representation of RecordBatch
ARROW_EXPORT
Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink);

Status ARROW_EXPORT DebugPrint(const Array& arr, int indent);
/// \brief Print human-readable representation of Array
ARROW_EXPORT
Status PrettyPrint(const Array& arr, int indent, std::ostream* sink);

ARROW_EXPORT
Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
std::ostream* sink);

ARROW_EXPORT
Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
std::string* result);

ARROW_EXPORT
Status DebugPrint(const Array& arr, int indent);

} // namespace arrow

Expand Down
Loading

0 comments on commit a3514a3

Please sign in to comment.