Skip to content

Commit

Permalink
Minimum JSON implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
johnmyleswhite committed Jan 6, 2014
1 parent e26d331 commit 439de4a
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 1 deletion.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,15 @@ Wraps libraries for reading foreign file formats:

* Evan Miller's Stata, SPSS, ... reader
* Avik Sengupta's Excel reader
* As yet non-existent JSON reader
* JSON input/output

# Usage Example

using DataFrames
using DataFramesIO

s = """[{"id":1, "val":5.5}, {"id":2, "val": 6.6}]"""

df = json2df(s)
json = df2json(df)
df2 = json2df(json)
12 changes: 12 additions & 0 deletions src/DataFramesIO.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module DataFramesIO
using DataArrays
using DataFrames
using JSON

export json2df, df2json
# export xls2df, df2xls
# export stata2df, df2stata
# export spss2df, df2spss

include("json.jl")
end
51 changes: 51 additions & 0 deletions src/json.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# function tighttypes!(adf::AbstractDataFrame)
# nrows, ncols = size(adf)
# for j in 1:ncols
# T = None
# col = adf[j]
# for i in 1:nrows
# if !isna(col[i])
# T = typejoin(T, typeof(col[i]))
# end
# end
# adf[j] = convert(DataVector{T}, col)
# end
# return
# end

function json2df(s::String) # -> DataFrame
# TODO: Handle NA's properly
# TODO: Optimize memory access
# TODO: Implement and call tighttypes!(df)
arrayofhashes = JSON.parse(s)
nrows = length(arrayofhashes)
if nrows == 0
return DataFrame()
end
colnames = convert(Vector{UTF8String}, collect(keys(arrayofhashes[1])))
sort!(colnames)
# Check that keys are valid column names
ncols = length(colnames)
df = DataFrame(repeat([Any], inner = [ncols]), colnames, nrows)
for i in 1:nrows
for j in 1:ncols
df[i, j] = arrayofhashes[i][colnames[j]]
end
end
# tighttypes!(df)
clean_colnames!(df)
return df
end

function df2json(adf::AbstractDataFrame) # -> UTF8String
nrows, ncols = size(adf)
cnames = colnames(adf)
arrayofhashes = Array(Dict{UTF8String, Any}, nrows)
for i in 1:nrows
arrayofhashes[i] = Dict{UTF8String, Any}()
for j in 1:ncols
arrayofhashes[i][cnames[j]] = adf[i, j]
end
end
return JSON.json(arrayofhashes)
end
36 changes: 36 additions & 0 deletions test/json.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
module TestJSON
using Base.Test
using DataFrames
using DataFramesIO

s = """[
{
"id":1,
"company":"Telstra",
"symbol":"ASX:TLS",
"price":5.27
},
{
"id":2,
"company":"BHP",
"symbol":"ASX:BHP",
"price":37.77
},
{
"id":3,
"company":"Commonwealth Bank of Australia",
"symbol":"ASX:CBA",
"price":77.58
}
]"""

df = json2df(s)
@test isequal(size(df, 1), 3)
@test isequal(size(df, 2), 4)
@test isequal(colnames(df), ["company", "id", "price", "symbol"])
@test isequal(df[3, "id"], 3)
@test isequal(df[3, "price"], 77.58)
json = df2json(df)
df2 = json2df(json)
@test df == df2
end
16 changes: 16 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# Correctness Tests
#

using Base.Test
using DataFrames
using DataFramesIO

my_tests = ["json.jl"]

@printf "Running tests:\n"

for my_test in my_tests
@printf " * %s\n" my_test
include(my_test)
end

0 comments on commit 439de4a

Please sign in to comment.