Skip to content

Commit

Permalink
apacheGH-15247: [R] Error when trying to save a data.frame with NULL …
Browse files Browse the repository at this point in the history
…column names (apache#34798)

Before:
```
df <- data.frame(a=1:3,b=4:6)
names(df) <- NULL
arrow::write_feather(x=df, sink="~/tmp/df")
#> Error: Invalid input type, expected 'character' actual 'NULL'
arrow::write_csv_arrow(df, "tst.csv")
#> Error in `value[[3L]]()`:
#> ! x must be an object of class 'data.frame', 'RecordBatch', 'Dataset', 'Table', or 'RecordBatchReader' not 'data.frame'.

#> Backtrace:
#>     ▆
#>  1. └─arrow::write_csv_arrow(df, "tst.csv")
#>  2.   └─base::tryCatch(...)
#>  3.     └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  4.       └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  5.         └─value[[3L]](cond)
#>  6.           └─rlang::abort(...)
arrow::write_parquet(df, "tst.csv")
#> Error: Invalid input type, expected 'character' actual 'NULL'
```
After:

``` r
df <- data.frame(a=1:3,b=4:6)
names(df) <- NULL
arrow::write_feather(x=df, sink="~/tmp/df")
#> Error in `arrow::write_feather()`:
#> ! Input data frame columns must be named
#> ℹ Column names are NULL

#> Backtrace:
#>      ▆
#>   1. └─arrow::write_feather(x = df, sink = "~/tmp/df")
#>   2.   └─arrow:::as_writable_table(x) at r/R/feather.R:124:2
#>   3.     ├─base::tryCatch(...) at r/R/util.R:150:2
#>   4.     │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>   5.     │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>   6.     │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>   7.     ├─arrow::as_arrow_table(x) at r/R/util.R:150:2
#>   8.     └─arrow:::as_arrow_table.data.frame(x) at r/R/table.R:284:2
#>   9.       └─arrow:::check_named_cols(x) at r/R/table.R:324:2
#>  10.         └─rlang::abort(...) at r/R/util.R:255:4
arrow::write_csv_arrow(df, "tst.csv")
#> Error in `arrow::write_csv_arrow()`:
#> ! Input data frame columns must be named
#> ℹ Column names are NULL

#> Backtrace:
#>     ▆
#>  1. └─arrow::write_csv_arrow(df, "tst.csv")
#>  2.   └─base::tryCatch(...) at r/R/csv.R:782:4
#>  3.     └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  4.       └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  5.         └─value[[3L]](cond)
#>  6.           └─rlang::abort(conditionMessage(e), parent = NA) at r/R/csv.R:786:10
arrow::write_parquet(df, "tst.csv")
#> Error in `arrow::write_parquet()`:
#> ! Input data frame columns must be named
#> ℹ Column names are NULL

#> Backtrace:
#>      ▆
#>   1. └─arrow::write_parquet(df, "tst.csv")
#>   2.   └─arrow:::as_writable_table(x) at r/R/parquet.R:164:2
#>   3.     ├─base::tryCatch(...) at r/R/util.R:150:2
#>   4.     │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>   5.     │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>   6.     │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>   7.     ├─arrow::as_arrow_table(x) at r/R/util.R:150:2
#>   8.     └─arrow:::as_arrow_table.data.frame(x) at r/R/table.R:284:2
#>   9.       └─arrow:::check_named_cols(x) at r/R/table.R:324:2
#>  10.         └─rlang::abort(...) at r/R/util.R:255:4
```

* Closes: apache#15247

Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
  • Loading branch information
thisisnic authored Mar 31, 2023
1 parent 68dc40a commit 8efb43d
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 6 deletions.
15 changes: 10 additions & 5 deletions r/R/csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -782,12 +782,17 @@ write_csv_arrow <- function(x,
tryCatch(
x <- as_record_batch_reader(x),
error = function(e) {
abort(
paste0(
"x must be an object of class 'data.frame', 'RecordBatch', ",
"'Dataset', 'Table', or 'RecordBatchReader' not '", class(x)[1], "'."
if (grepl("Input data frame columns must be named", conditionMessage(e))) {
abort(conditionMessage(e), parent = NA)
} else {
abort(
paste0(
"x must be an object of class 'data.frame', 'RecordBatch', ",
"'Dataset', 'Table', or 'RecordBatchReader' not '", class(x)[1], "'."
),
parent = NA
)
)
}
}
)
}
Expand Down
1 change: 1 addition & 0 deletions r/R/dataset-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ write_dataset <- function(dataset,
# partitioning vars need to be in the `select` schema
dataset <- ensure_group_vars(dataset)
} else {
check_named_cols(dataset)
if (inherits(dataset, "grouped_df")) {
force(partitioning)
# Drop the grouping metadata before writing; we've already consumed it
Expand Down
1 change: 1 addition & 0 deletions r/R/record-batch-reader.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ as_record_batch_reader.RecordBatch <- function(x, ...) {
#' @rdname as_record_batch_reader
#' @export
as_record_batch_reader.data.frame <- function(x, ...) {
check_named_cols(x)
RecordBatchReader$create(as_record_batch(x))
}

Expand Down
1 change: 1 addition & 0 deletions r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ as_arrow_table.RecordBatch <- function(x, ..., schema = NULL) {
#' @rdname as_arrow_table
#' @export
as_arrow_table.data.frame <- function(x, ..., schema = NULL) {
check_named_cols(x)
Table$create(x, schema = schema)
}

Expand Down
13 changes: 12 additions & 1 deletion r/R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ recycle_scalars <- function(arrays) {
is_scalar <- arr_lens == 1

if (length(arrays) > 1 && any(is_scalar) && !all(is_scalar)) {

# Recycling not supported for tibbles and data.frames
if (all(map_lgl(arrays, ~ inherits(.x, "data.frame")))) {
abort(c(
Expand Down Expand Up @@ -250,3 +249,15 @@ augment_io_error_msg <- function(e, call, schema = NULL, format = NULL) {
handle_augmented_field_misuse(msg, call)
abort(msg, call = call)
}

check_named_cols <- function(df) {
if (inherits(df, "data.frame") && is.null(names(df))) {
abort(
c(
"Input data frame columns must be named",
i = "Column names are NULL"
),
call = caller_env(n = 3)
)
}
}
6 changes: 6 additions & 0 deletions r/tests/testthat/test-Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -705,3 +705,9 @@ test_that("can create empty table from schema", {
expect_equal(nrow(out), 0)
expect_equal(out$schema, schema)
})

test_that("as_arrow_table() errors on data.frame with NULL names", {
df <- data.frame(a = 1, b = "two")
names(df) <- NULL
expect_error(as_arrow_table(df), "Input data frame columns must be named")
})
6 changes: 6 additions & 0 deletions r/tests/testthat/test-dataset-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -806,3 +806,9 @@ test_that("Can delete filesystem dataset after write_dataset", {
unlink(dataset_dir2, recursive = TRUE)
expect_false(dir.exists(dataset_dir2))
})

test_that("write_dataset() errors on data.frame with NULL names", {
df <- data.frame(a = 1, b = "two")
names(df) <- NULL
expect_error(write_dataset(df, tempfile()), "Input data frame columns must be named")
})
6 changes: 6 additions & 0 deletions r/tests/testthat/test-record-batch-reader.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,9 @@ test_that("as_record_batch_reader() works for function", {
"Expected fun\\(\\) to return batch with schema 'a: string'"
)
})

test_that("as_record_batch_reader() errors on data.frame with NULL names", {
df <- data.frame(a = 1, b = "two")
names(df) <- NULL
expect_error(as_record_batch_reader(df), "Input data frame columns must be named")
})

0 comments on commit 8efb43d

Please sign in to comment.