-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path10-decoder-csv.t
148 lines (134 loc) · 4.8 KB
/
10-decoder-csv.t
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#! /usr/bin/env perl
use strict;
use warnings;
use Test::More;
use Try::Tiny;
use Log::Any '$log';
use Log::Any::Adapter 'TAP';
use Data::TableReader::Decoder::CSV;
my $csvmod;
plan skip_all => 'Need a CSV parser for this test'
unless try { $csvmod= Data::TableReader::Decoder::CSV->default_csv_module };
note "CSV decoder is ".$csvmod." version ".$csvmod->VERSION;
my $log_fn= sub { $log->can($_[0])->($log, $_[1]) };
sub test_basic {
my $input= ascii();
open my $input_fh, '<', \$input or die;
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
'CSV decoder' );
ok( my $iter= $d->iterator, 'got iterator' );
is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row' );
is( $iter->row, 1, 'row=1' );
is( $iter->dataset_idx, 0, 'dataset_idx=0' );
is_deeply( $iter->(), [ '1', '2', '3', '4' ], 'second row' );
is( $iter->row, 2, 'row=2' );
is_deeply( $iter->(), undef, 'no third row' );
is( $iter->row, 2, 'row=2' );
is( $iter->dataset_idx, 0, 'dataset_idx=0' );
ok( $iter->seek(0), 'rewind' );
is( $iter->row, 0, 'row=0' );
is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row again' );
is( $iter->row, 1, 'row=1' );
is_deeply( $iter->([2,1]), [ '3', '2' ], 'slice from second row' );
ok( !$iter->next_dataset, 'no next dataset' );
is( $iter->dataset_idx, 0, 'dataset_idx=0' );
}
sub test_multi_iterator {
my $input= ascii();
open my $input_fh, '<', \$input or die;
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
'CSV decoder' );
ok( my $iter= $d->iterator, 'create first iterator' );
# This might be supported in the future, but for now ensure it dies
like( (try { $d->iterator } catch {$_}), qr/multiple iterator/i, 'error for multiple iterators' );
undef $iter; # release old iterator, freeing up the file handle to create a new one
ok( $iter= $d->iterator, 'new iterator' );
is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row again' );
}
sub test_utf_bom {
for my $input_fn (qw( utf8_bom utf16_le_bom utf16_be_bom utf8_nobom deceptive_utf8_nobom )) {
subtest "seekable $input_fn" => sub {
my $input= main->$input_fn;
open my $input_fh, '<', \$input or die;
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
"CSV decoder for $input_fn" );
ok( my $iter= $d->iterator, 'got iterator' );
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' );
is_deeply( $iter->(), [ "\x{27000}" ], 'third row' );
is_deeply( $iter->(), undef, 'no fourth row' );
ok( $iter->seek(0), 'rewind' );
# workaround for a perl bug! the input string gets corrupted
substr($input,0,8)= substr(main->$input_fn,0,8);
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->([0,3]), [ "\x{8A66}\x{3057}", 3 ], 'slice from second row' );
ok( !$iter->next_dataset, 'no next dataset' );
};
subtest "nonseekable $input_fn" => sub {
my $input= main->$input_fn;
pipe(my ($input_fh, $out_fh)) or die "pipe: $!";
print $out_fh $input or die "print(pipe_out): $!";
close $out_fh or die "close: $!";
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
"CSV decoder for $input_fn" );
if ($input_fn =~ /deceptive/) {
# Some inputs on non-seekable file handles will result in this exception.
# This is expected.
like( (try { $d->iterator } catch {$_}), qr/seek/, 'can\'t seek exception' );
} else {
ok( my $iter= $d->iterator, 'got iterator' );
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' );
is_deeply( $iter->(), [ "\x{27000}" ], 'third row' );
is_deeply( $iter->(), undef, 'no fourth row' );
ok( !$iter->next_dataset, 'no next dataset' );
}
};
}
}
subtest basic => \&test_basic;
subtest multi_iter => \&test_multi_iterator;
subtest utf_bom => \&test_utf_bom;
done_testing;
sub ascii {
return <<END;
a,b,c,d
1,2,3,4
END
}
sub utf8_bom {
# BOM "test\n"
# "\x{8A66}\x{3057},1,2,3\n"
# "\x{27000}\n"
return "\xEF\xBB\xBF"
."test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
sub utf16_le_bom {
return "\xFF\xFE"
."t\0e\0s\0t\0\n\0"
."\x66\x8A\x57\x30,\x001\x00,\x002\x00,\x003\x00\n\x00"
."\x5C\xD8\x00\xDC\n\0";
}
sub utf16_be_bom {
return "\xFE\xFF"
."\x00t\x00e\x00s\x00t\x00\n"
."\x8A\x66\x30\x57\x00,\x001\x00,\x002\x00,\x003\x00\n"
."\xD8\x5C\xDC\x00\0\n";
}
sub utf8_nobom {
return "test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
sub deceptive_utf8_nobom {
return "\xEF\xBF\xBD"
."test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}