Skip to content

Commit

Permalink
Merge pull request ClickHouse#60994 from bigo-sg/csv-tuple
Browse files Browse the repository at this point in the history
fix csv format not support tuple
  • Loading branch information
yakov-olkhovskiy authored Mar 27, 2024
2 parents 3fa6d23 + a8866db commit 257cdd8
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 65 deletions.
67 changes: 13 additions & 54 deletions src/DataTypes/Serializations/SerializationTuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <Common/assert_cast.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>


Expand Down Expand Up @@ -526,68 +527,26 @@ void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num

void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
for (size_t i = 0; i < elems.size(); ++i)
{
if (i != 0)
writeChar(settings.csv.tuple_delimiter, ostr);
elems[i]->serializeTextCSV(extractElementColumn(column, i), row_num, ostr, settings);
}
WriteBufferFromOwnString wb;
serializeText(column, row_num, wb, settings);
writeCSV(wb.str(), ostr);
}

void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
addElementSafe<void>(elems.size(), column, [&]
{
const size_t size = elems.size();
for (size_t i = 0; i < size; ++i)
{
if (i != 0)
{
skipWhitespaceIfAny(istr);
assertChar(settings.csv.tuple_delimiter, istr);
skipWhitespaceIfAny(istr);
}

auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]);
else
elems[i]->deserializeTextCSV(element_column, istr, settings);
}
return true;
});
String s;
readCSV(s, istr, settings.csv);
ReadBufferFromString rb(s);
deserializeText(column, rb, settings, true);
}

bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return addElementSafe<bool>(elems.size(), column, [&]
{
const size_t size = elems.size();
for (size_t i = 0; i < size; ++i)
{
if (i != 0)
{
skipWhitespaceIfAny(istr);
if (!checkChar(settings.csv.tuple_delimiter, istr))
return false;
skipWhitespaceIfAny(istr);
}

auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
{
if (!SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]))
return false;
}
else
{
if (!elems[i]->tryDeserializeTextCSV(element_column, istr, settings))
return false;
}
}

return true;
});
String s;
if (!tryReadCSV(s, istr, settings.csv))
return false;
ReadBufferFromString rb(s);
return tryDeserializeText(column, rb, settings, true);
}

void SerializationTuple::enumerateStreams(
Expand Down
4 changes: 2 additions & 2 deletions src/Formats/EscapingRuleUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,8 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
/// Try to determine the type of value inside quotes
auto type = tryInferDataTypeForSingleField(data, format_settings);

/// If we couldn't infer any type or it's tuple in quotes or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
if (!type || isTuple(type) || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings))
/// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
if (!type || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings))
return std::make_shared<DataTypeString>();

return type;
Expand Down
6 changes: 3 additions & 3 deletions tests/queries/0_stateless/00300_csv.reference
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline
"Hello, ""World""",123,"[1,2,3]","(456,['abc','def'])","Newline
here"
"x","y","z","a","b"
"Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline
"Hello, ""World""",123,"[1,2,3]","(456,['abc','def'])","Newline
here"
"x","y","z","a","b"
"String","UInt8","Array(UInt8)","Tuple(UInt16, Array(String))","String"
"Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline
"Hello, ""World""",123,"[1,2,3]","(456,['abc','def'])","Newline
here"
0,"0","[]","2000-01-01","2000-01-01 00:00:00"
1,"1","[0]","2000-01-02","2000-01-01 00:00:01"
Expand Down
Binary file modified tests/queries/0_stateless/00309_formats.reference
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/queries/0_stateless/00309_formats_case_insensitive.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
-- Tags: no-parallel

SELECT '-- test FORMAT clause --';
SET output_format_write_statistics = 0;
SELECT number, 'Hello & world' FROM numbers(3) FORMAT Tsv;
Expand Down
4 changes: 2 additions & 2 deletions tests/queries/0_stateless/01016_input_null_as_default.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE default_by_other_column (a Float32 DEFA

echo 'CSV'
echo '\N, 1, \N, "2019-07-22", "[10, 20, 30]", \N
1, world, 3, "2019-07-23", \N, tuple, 3.14
2, \N, 123, \N, "[]", test, 2.71828
1, world, 3, "2019-07-23", \N, "('\''tuple'\'', 3.14)"
2, \N, 123, \N, "[]", "('\''test'\'', 2.71828)"
3, \N, \N, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT CSV";
$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ c1 Array(Nullable(Bool))
[]
[NULL]
[false]
c1 Nullable(String)
(1, 2, 3)
c1 Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64))
(1,2,3)
c1 Nullable(String)
123.123
c1 Array(Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ CSV
c1 Nullable(UInt64)
c2 Nullable(String)
c3 Array(Nullable(UInt64))
c4 Nullable(UInt64)
c5 Nullable(String)
c4 Tuple(Nullable(UInt64), Nullable(String))
a Nullable(String)
b Nullable(String)
c Array(Nullable(String))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
c1 Nullable(String)
c2 Nullable(Int64)
c3 Array(Nullable(String))
c4 Map(String, Nullable(Int64))
c5 Tuple(Nullable(String), Nullable(Int64), Map(String, Nullable(Int64)))
20240305 1 ['s','d'] {'a':2} ('222',33,{'abc':5})
5 changes: 5 additions & 0 deletions tests/queries/0_stateless/02977_csv_format_support_tuple.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Tags: no-parallel

insert into function file('02977_1.csv') select '20240305', 1, ['s', 'd'], map('a', 2), tuple('222', 33, map('abc', 5)) SETTINGS engine_file_truncate_on_insert=1;
desc file('02977_1.csv');
select * from file('02977_1.csv') settings max_threads=1;

0 comments on commit 257cdd8

Please sign in to comment.