Skip to content

Commit

Permalink
added readr.read_csv, data.tables.fread to experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
Damian Eads committed Jun 5, 2016
1 parent 037a503 commit cf7a8c4
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 0 deletions.
55 changes: 55 additions & 0 deletions bench/test_fread.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env Rscript
#
# test_fread.R in.csv out.json
#
# Loads the file in.csv into an R data frame with fread, sums its numeric
# columns, and outputs the run times and memory usage to the JSON file
# out.json.

require("data.table", quietly=TRUE)
require("rjson", quietly=TRUE)

"OlsonNames" = function ()
{
if (.Platform$OS.type == "windows")
tzdir <- Sys.getenv("TZDIR", file.path(R.home("share"),
"zoneinfo"))
else {
tzdirs <- c(Sys.getenv("TZDIR"), file.path(R.home("share"),
"zoneinfo"), "/usr/share/zoneinfo", "/usr/share/lib/zoneinfo",
"/usr/lib/zoneinfo", "/usr/local/etc/zoneinfo", "/etc/zoneinfo",
"/usr/etc/zoneinfo")
tzdirs <- tzdirs[file.exists(tzdirs)]
if (!length(tzdirs)) {
warning("no Olson database found")
return(character())
}
else tzdir <- tzdirs[1]
}
x <- list.files(tzdir, recursive = TRUE)
grep("^[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", x, value = TRUE)
}

memory_usage <- function(){
return(strtoi(system(paste("ps -o rss ", Sys.getpid(), "| tail -1"), intern=TRUE))*1024)
}

args <- commandArgs(TRUE)
filename <- args[1]
result_filename <- args[2]
load_tic <- Sys.time()
df = fread(filename)
load_toc <- Sys.time()
load_time <- as.double(difftime(load_toc, load_tic, units="secs"))

mem <- memory_usage()

sum_tic <- Sys.time()
s <- colSums(Filter(is.numeric, df))
s <- s + apply(Filter(function(x){!is.numeric(x)}, df), 2, function(x){sum(nchar(x))})
sum_toc <- Sys.time()
sum_time <- as.double(difftime(sum_toc, sum_tic, units="secs"))

results = list(cmd = "R-fread", load_time = load_time, mem = mem, sum_time = sum_time)
json = rjson::toJSON(results)
write(json, result_filename)
33 changes: 33 additions & 0 deletions bench/test_readcsv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env Rscript
#
# test_readcsv.R in.csv out.json
#
# Loads the file in.csv into an R data frame. Sums its numeric
# columns. Outputs the run times and memory usage to the JSON file
# out.json.

require("rjson", quietly=TRUE)

memory_usage <- function(){
return(strtoi(system(paste("ps -o rss ", Sys.getpid(), "| tail -1"), intern=TRUE))*1024)
}

args <- commandArgs(TRUE)
filename <- args[1]
result_filename <- args[2]
load_tic <- Sys.time()
df = read.csv(filename)
load_toc <- Sys.time()
load_time <- as.double(difftime(load_toc, load_tic, units="secs"))

mem <- memory_usage()

sum_tic <- Sys.time()
s <- colSums(Filter(is.numeric, df))
s2 <- apply(Filter(function(x){!is.numeric(x)}, df), 2, function(x){sum(nchar(x))})
sum_toc <- Sys.time()
sum_time <- as.double(difftime(sum_toc, sum_tic, units="secs"))

results = list(cmd = "R-readcsv", load_time = load_time, mem = mem, sum_time = sum_time)
json = rjson::toJSON(results)
write(json, result_filename)
54 changes: 54 additions & 0 deletions bench/test_readr.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env Rscript
#
# test_datatable.R in.csv out.json
#
# Loads the file in.csv into an R data frame with fread, sums its numeric
# columns, and outputs the run times and memory usage to the JSON file
# out.json.

require("readr", quietly=TRUE)

"OlsonNames" = function ()
{
if (.Platform$OS.type == "windows")
tzdir <- Sys.getenv("TZDIR", file.path(R.home("share"),
"zoneinfo"))
else {
tzdirs <- c(Sys.getenv("TZDIR"), file.path(R.home("share"),
"zoneinfo"), "/usr/share/zoneinfo", "/usr/share/lib/zoneinfo",
"/usr/lib/zoneinfo", "/usr/local/etc/zoneinfo", "/etc/zoneinfo",
"/usr/etc/zoneinfo")
tzdirs <- tzdirs[file.exists(tzdirs)]
if (!length(tzdirs)) {
warning("no Olson database found")
return(character())
}
else tzdir <- tzdirs[1]
}
x <- list.files(tzdir, recursive = TRUE)
grep("^[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", x, value = TRUE)
}

memory_usage <- function(){
return(strtoi(system(paste("ps -o rss ", Sys.getpid(), "| tail -1"), intern=TRUE))*1024)
}

args <- commandArgs(TRUE)
filename <- args[1]
result_filename <- args[2]
load_tic <- Sys.time()
df = read_csv(filename)
load_toc <- Sys.time()
load_time <- as.double(difftime(load_toc, load_tic, units="secs"))

mem <- memory_usage()

sum_tic <- Sys.time()
s <- colSums(Filter(is.numeric, df))
s2 <- apply(Filter(function(x){!is.numeric(x)}, df), 2, function(x){sum(nchar(x))})
sum_toc <- Sys.time()
sum_time <- as.double(difftime(sum_toc, sum_tic, units="secs"))

results = list(cmd = "R-readr", load_time = load_time, mem = mem, sum_time = sum_time)
json = rjson::toJSON(results)
write(json, result_filename)

0 comments on commit cf7a8c4

Please sign in to comment.