Skip to content

Commit

Permalink
[BugFix] fix hive scanner read sturct with char and varchar type (Sta…
Browse files Browse the repository at this point in the history
…rRocks#36258)

Signed-off-by: before-Sunrise <[email protected]>
  • Loading branch information
before-Sunrise authored Dec 4, 2023
1 parent f2f40ce commit f9a27ed
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 7 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ private void parse(StringScanner scanner) {
if (t.startsWith("decimal")) {
t = scanner.substr(end);
scanner.moveTo(end);
} else if (t.startsWith("char") || t.startsWith("varchar")) {
// right now this only used in hive scanner
// for char(xx) and varchar(xx), we only need t to be char or varchar and skip (xx)
// otherwise struct<c_char:char(30),c_varchar:varchar(200)> will get wrong result
scanner.moveTo(end);
} else {
scanner.moveTo(p);
}
Expand Down
51 changes: 45 additions & 6 deletions test/sql/test_external_file/R/test_hive_jni_format
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
[UC]shell: avro_prefix=echo "oss://${oss_bucket}/test_hive_format/${uuid0}/avro_format/"
-- result:
0
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/18b53aff8c6f422e87ef9e3b4a3219e5/avro_format/
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/6ba76853949b4f359e42a656a07b62be/avro_format/
-- !result
shell: ossutil64 mkdir ${avro_prefix[1]} > /dev/null || echo "exit 0" >/dev/null
-- result:
Expand Down Expand Up @@ -58,8 +58,8 @@ select * from test_hive_avro_format where abs(col_float - 1.23) < 0.01 ;
-- !result
select col_tinyint,col_decimal,col_array from test_hive_avro_format;
-- result:
7 57.30 ["A","B","C"]
1 100.50 ["D","E","F"]
7 57.30 ["A","B","C"]
-- !result
shell: ossutil64 rm -rf ${avro_prefix[1]} >/dev/null || echo "exit 0" >/dev/null
-- result:
Expand All @@ -70,7 +70,7 @@ shell: ossutil64 rm -rf ${avro_prefix[1]} >/dev/null || echo "exit 0" >/dev/nul
[UC]shell: rcbinary_prefix=echo "oss://${oss_bucket}/test_hive_format/${uuid0}/rcbinary_format/"
-- result:
0
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/ed0414200602437987c7771c55e98b05/rcbinary_format/
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/4c7b700ae19d46cfa0b4776b877d3aee/rcbinary_format/
-- !result
shell: ossutil64 mkdir ${rcbinary_prefix[1]} > /dev/null || echo "exit 0" >/dev/null
-- result:
Expand Down Expand Up @@ -138,7 +138,7 @@ shell: ossutil64 rm -rf ${rcbinary_prefix[1]} >/dev/null || echo "exit 0" >/dev
[UC]shell: rctext_prefix=echo "oss://${oss_bucket}/test_hive_format/${uuid0}/rctext_format/"
-- result:
0
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/ac2c404dfb8e4a17ace8e3aa6d2c0e47/rctext_format/
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/25370a4593f54c50b7aa7412f36288ed/rctext_format/
-- !result
shell: ossutil64 mkdir ${rctext_prefix[1]} > /dev/null || echo "exit 0" >/dev/null
-- result:
Expand Down Expand Up @@ -206,7 +206,7 @@ shell: ossutil64 rm -rf ${rctext_prefix[1]} >/dev/null || echo "exit 0" >/dev/n
[UC]shell: sequence_prefix=echo "oss://${oss_bucket}/test_hive_format/${uuid0}/sequence_format/"
-- result:
0
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/7847f5f081e54501a414380f71600937/sequence_format/
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/f54980c495c94ac39dafaf12529089e3/sequence_format/
-- !result
shell: ossutil64 mkdir ${sequence_prefix[1]} > /dev/null || echo "exit 0" >/dev/null
-- result:
Expand Down Expand Up @@ -262,11 +262,50 @@ select * from test_hive_sequence_format where abs(col_float - 1.23) < 0.01 ;
-- !result
select col_tinyint,col_decimal,col_array from test_hive_sequence_format;
-- result:
7 57.30 ["A","B","C"]
1 100.50 ["D","E","F"]
7 57.30 ["A","B","C"]
-- !result
shell: ossutil64 rm -rf ${sequence_prefix[1]} >/dev/null || echo "exit 0" >/dev/null
-- result:
0

-- !result
-- name: testHiveStructCharAndVarchar
[UC]shell: struct_prefix=echo "oss://${oss_bucket}/test_hive_format/${uuid0}/strcut/"
-- result:
0
oss://starrocks-sql-data-zhangjiakou/jiangyangjun/test_hive_format/2519b8d04d83440cbe09f5287fab7bf5/strcut/
-- !result
shell: ossutil64 mkdir ${struct_prefix[1]} > /dev/null || echo "exit 0" >/dev/null
-- result:
0

-- !result
shell: ossutil64 cp --force ../be/test/exec/test_data/jni_scanner/test_hive_format/struct.seq ${struct_prefix[1]} | grep -Pv "(average|elapsed)"
-- result:
0

Succeed: Total num: 1, size: 15,266. OK num: 1(upload 1 files).
-- !result
CREATE TABLE `hive_hdfs_sequencefile_struct_mix_deflate` (
`col_int` int,
`col_string` varchar(1048576),
`col_struct` struct<c_int int(11), c_float float, c_double double, c_char char(30), c_varchar varchar(200), c_date date, c_timestamp datetime, c_boolean boolean>
)
ENGINE=file
PROPERTIES
(
"path" = "${struct_prefix[1]}",
"format" = "sequence"
);
-- result:
-- !result
select col_int,col_struct from hive_hdfs_sequencefile_struct_mix_deflate order by 1 limit 1;
-- result:
-2144975700 {"c_int":1102434235,"c_float":-9972.251,"c_double":18.68127,"c_char":"(115)699-5565x12614 ","c_varchar":"Mozilla/5.0 (compatible; MSIE 5.0; Windows 95; Trident/4.1)","c_date":"2000-01-29","c_timestamp":"2000-11-10 11:01:59","c_boolean":1}
-- !result
shell: ossutil64 rm -rf ${struct_prefix[1]} >/dev/null || echo "exit 0" >/dev/null
-- result:
0

-- !result
22 changes: 21 additions & 1 deletion test/sql/test_external_file/T/test_hive_jni_format
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,24 @@ select * from test_hive_sequence_format where col_string = 'world';
select * from test_hive_sequence_format where abs(col_float - 1.23) < 0.01 ;
select col_tinyint,col_decimal,col_array from test_hive_sequence_format;

shell: ossutil64 rm -rf ${sequence_prefix[1]} >/dev/null || echo "exit 0" >/dev/null
shell: ossutil64 rm -rf ${sequence_prefix[1]} >/dev/null || echo "exit 0" >/dev/null

-- name: testHiveStructCharAndVarchar
[UC]shell: struct_prefix=echo "oss://${oss_bucket}/test_hive_format/${uuid0}/strcut/"
shell: ossutil64 mkdir ${struct_prefix[1]} > /dev/null || echo "exit 0" >/dev/null
shell: ossutil64 cp --force ../be/test/exec/test_data/jni_scanner/test_hive_format/struct.seq ${struct_prefix[1]} | grep -Pv "(average|elapsed)"


CREATE TABLE `hive_hdfs_sequencefile_struct_mix_deflate` (
`col_int` int,
`col_string` varchar(1048576),
`col_struct` struct<c_int int(11), c_float float, c_double double, c_char char(30), c_varchar varchar(200), c_date date, c_timestamp datetime, c_boolean boolean>
)
ENGINE=file
PROPERTIES
(
"path" = "${struct_prefix[1]}",
"format" = "sequence"
);
select col_int,col_struct from hive_hdfs_sequencefile_struct_mix_deflate order by 1 limit 1;
shell: ossutil64 rm -rf ${struct_prefix[1]} >/dev/null || echo "exit 0" >/dev/null

0 comments on commit f9a27ed

Please sign in to comment.