Skip to content

Commit

Permalink
write to s3 method
Browse files Browse the repository at this point in the history
  • Loading branch information
tmastny committed Nov 23, 2019
1 parent 129faf1 commit 604ec66
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 16 deletions.
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ export(batch_predict)
export(default_bucket)
export(download_file)
export(install_sagemaker)
export(s3_path)
export(read_s3)
export(s3)
export(sagemaker_attach_tuner)
export(sagemaker_delete_endpoint)
export(sagemaker_deploy_endpoint)
Expand All @@ -21,4 +22,5 @@ export(sagemaker_tuning_job_logs)
export(sagemaker_xgb_container)
export(sagemaker_xgb_estimator)
export(upload_file)
export(write_s3)
importFrom(magrittr,"%>%")
46 changes: 34 additions & 12 deletions R/s3.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,35 @@ default_bucket <- function() {
}

#' @export
upload_file <- function(file, bucket = default_bucket(), key) {
upload_file <- function(file, s3_path) {
s3 <- boto3$client('s3')
s3$upload_file(file, bucket, key)

s3_components <- s3_bucket_key_extract(s3_path)
s3$upload_file(file, s3_components$bucket, s3_components$key)
}


#' @export
download_file <- function(s3_path_name, file_name) {
# TODO: is it possible to download to R session, without
# going through a file?

download_file <- function(s3_path, file) {
system(
paste0(
"aws s3 cp ",
s3_path_name(), " ",
file_name
s3_path, " ",
file
)
)
}

# downloads s3 object into the R session
# ... passes to readr::read_delim
#' @export
read_s3 <- function(s3_path_name, delim = ",", col_names = FALSE, ...) {
s3_components <- s3_bucket_key_extract(s3_path_name)
read_s3 <- function(s3_path, delim = ",", col_names = FALSE, ...) {
s3_components <- s3_bucket_key_extract(s3_path)

io <- reticulate::import("io")
s3 <- boto3$client('s3')

# https://datasciencechronicles.com.au/2017/11/12/adventures-in-python-1/
file <- io$BytesIO()
s3$download_fileobj(s3_components$bucket, s3_components$key, file)

Expand All @@ -47,13 +47,35 @@ read_s3 <- function(s3_path_name, delim = ",", col_names = FALSE, ...) {
s3_obj
}

# ... to format_csv
#' @export
write_s3 <- function(x, s3_path, delim = ",", col_names = FALSE, ...) {
s3_components <- s3_bucket_key_extract(s3_path)

io <- reticulate::import("io")
builtin <- reticulate::import_builtins()
s3 <- boto3$client('s3')

# https://datasciencechronicles.com.au/2017/11/12/adventures-in-python-1/
file <- io$BytesIO(builtin$bytes(
readr::format_delim(
x, delim = delim, col_names = col_names, ...
),
"utf-8"
))

s3$upload_fileobj(file, s3_components$bucket, s3_components$key)
}




#' @export
s3_path <- function(...) {
s3 <- function(...) {
path <- file.path(..., fsep = "/") %>%
paste0("s3://", .)

class(path) <- c("s3_path", class(path))
class(path) <- c("s3", class(path))
path
}

Expand Down
6 changes: 3 additions & 3 deletions R/xgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ sagemaker_estimator <- function(
role = sagemaker_get_execution_role(),
train_instance_count = 1L,
train_instance_type = "ml.m4.xlarge",
output_path = s3_path(default_bucket(), "models/"),
output_path = s3(default_bucket(), "models/"),
sagemaker_session = sagemaker$Session()

) {
Expand Down Expand Up @@ -403,8 +403,8 @@ batch_predict <- function(
logs = FALSE
)

# TODO: make s3_path generic so it knows how
# to take a s3_path object and not double
# TODO: make s3 generic so it knows how
# to take a s3 object and not double
# transform it like "s3://s3://".
# I think there are some examples in Shiny
# or htmltools.
Expand Down
10 changes: 10 additions & 0 deletions vignettes/random-cv2.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,16 @@ sagemaker_has_endpoint(model)
read_s3(s3_path(default_bucket(), "abalone-train.csv"))
```

```{r}
sagemaker::abalone %>%
write_s3(s3(default_bucket(), "test-upload.csv"))
```

```{r}
read_s3(s3(default_bucket(), "test-upload.csv"))
```



```{r}
tuner <- sagemaker$tuner$HyperparameterTuner$attach("xgboost-191114-2052")
Expand Down

0 comments on commit 604ec66

Please sign in to comment.