Skip to content

Commit

Permalink
readdlm handle CRLF line ending with quoted column
Browse files Browse the repository at this point in the history
  • Loading branch information
tanmaykm committed Apr 3, 2014
1 parent 055d929 commit e76eccf
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
21 changes: 12 additions & 9 deletions base/datafmt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,11 @@ function dlm_dims{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, ign_adj_dlm::Bool, al
try
slen = sizeof(dbuff)
col_start_idx = 1
was_cr = false
while idx <= slen
val,idx = next(dbuff, idx)
is_eol = (val == eol)
is_cr = (eol == '\n') && (val == '\r')
is_dlm = is_eol ? false : is_default_dlm ? in(val, _default_delims) : (val == dlm)
is_quote = (val == qchar)

Expand All @@ -220,7 +222,7 @@ function dlm_dims{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, ign_adj_dlm::Bool, al
elseif is_eol
nrows += 1
col += 1
offidx = store_column(offsets, nrows, col, false, col_start_idx, idx-2, offidx)
offidx = store_column(offsets, nrows, col, false, col_start_idx, idx - (was_cr ? 3 : 2), offidx)
col_start_idx = idx
ncols = max(ncols, col)
col = 0
Expand All @@ -230,7 +232,7 @@ function dlm_dims{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, ign_adj_dlm::Bool, al
is_quote && (state = 3)
elseif 0 == state # begin field
if is_quote
state = allow_quote ? 1 : 2
state = (allow_quote && !was_cr) ? 1 : 2
expct_col = false
elseif is_dlm
if !ign_adj_dlm
Expand All @@ -243,20 +245,20 @@ function dlm_dims{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, ign_adj_dlm::Bool, al
nrows += 1
if expct_col
col += 1
offidx = store_column(offsets, nrows, col, false, col_start_idx, idx-2, offidx)
offidx = store_column(offsets, nrows, col, false, col_start_idx, idx - (was_cr ? 3 : 2), offidx)
end
col_start_idx = idx
ncols = max(ncols, col)
col = 0
expct_col = false
else
elseif !is_cr
state = 2
expct_col = false
end
elseif 3 == state # second quote
if is_quote
if is_quote && !was_cr
state = 1
elseif is_dlm
elseif is_dlm && !was_cr
state = 0
col += 1
offidx = store_column(offsets, nrows+1, col, true, col_start_idx, idx-2, offidx)
Expand All @@ -265,16 +267,17 @@ function dlm_dims{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, ign_adj_dlm::Bool, al
elseif is_eol
nrows += 1
col += 1
offidx = store_column(offsets, nrows, col, true, col_start_idx, idx-2, offidx)
offidx = store_column(offsets, nrows, col, true, col_start_idx, idx - (was_cr ? 3 : 2), offidx)
col_start_idx = idx
ncols = max(ncols, col)
col = 0
state = 0
else
error_str = "unexpected character '$(char(val))' after quoted field at row $(nrows+1) column $(col+1)"
elseif (is_cr && was_cr) || !is_cr
error_str = escape_string("unexpected character '$(char(val))' after quoted field at row $(nrows+1) column $(col+1)")
break
end
end
was_cr = is_cr
end

if isempty(error_str)
Expand Down
2 changes: 2 additions & 0 deletions test/readdlm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ end
@test size(readcsv(IOBuffer("1,2,3,4\r\n"))) == (1,4)
@test size(readcsv(IOBuffer("1,2,3,4\r\n1,2,3\r\n"))) == (2,4)
@test size(readcsv(IOBuffer("1,2,3,4\r\n1,2,3,4\r\n"))) == (2,4)
@test size(readcsv(IOBuffer("1,2,3,\"4\"\r\n1,2,3,4\r\n"))) == (2,4)

@test size(readdlm(IOBuffer("1 2 3 4\n1 2 3"))) == (2,4)
@test size(readdlm(IOBuffer("1\t2 3 4\n1 2 3"))) == (2,4)
Expand Down Expand Up @@ -49,6 +50,7 @@ let result1 = reshape({1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, ""}, 2, 4)
@test isequaldlm(readdlm(IOBuffer("1,2,3,4\n1,2,3\n"), ','), result1, Any)
@test isequaldlm(readdlm(IOBuffer("1,2,3,4\n1,2,3"), ','), result1, Any)
@test isequaldlm(readdlm(IOBuffer("1,2,3,4\r\n1,2,3\r\n"), ','), result1, Any)
@test isequaldlm(readdlm(IOBuffer("1,2,3,\"4\"\r\n1,2,3\r\n"), ','), result1, Any)
end

let result1 = reshape({"abc", "hello", "def,ghi", " \"quote\" ", "new\nline", "world"}, 2, 3),
Expand Down

0 comments on commit e76eccf

Please sign in to comment.