Minimum JSON implementation

johnmyleswhite · Jan 6, 2014 · 439de4a · 439de4a
1 parent e26d331
commit 439de4a
Show file tree

Hide file tree

Showing 5 changed files with 127 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -5,4 +5,15 @@ Wraps libraries for reading foreign file formats:
 
 * Evan Miller's Stata, SPSS, ... reader
 * Avik Sengupta's Excel reader
-* As yet non-existent JSON reader
+* JSON input/output
+
+# Usage Example
+
+    using DataFrames
+    using DataFramesIO
+
+    s =  """[{"id":1, "val":5.5}, {"id":2, "val": 6.6}]"""
+
+    df = json2df(s)
+    json = df2json(df)
+    df2 = json2df(json)
diff --git a/src/DataFramesIO.jl b/src/DataFramesIO.jl
@@ -0,0 +1,12 @@
+module DataFramesIO
+	using DataArrays
+	using DataFrames
+	using JSON
+
+	export json2df, df2json
+	# export xls2df, df2xls
+	# export stata2df, df2stata
+	# export spss2df, df2spss
+
+	include("json.jl")
+end
diff --git a/src/json.jl b/src/json.jl
@@ -0,0 +1,51 @@
+# function tighttypes!(adf::AbstractDataFrame)
+#     nrows, ncols = size(adf)
+#     for j in 1:ncols
+#         T = None
+#         col = adf[j]
+#         for i in 1:nrows
+#             if !isna(col[i])
+#                 T = typejoin(T, typeof(col[i]))
+#             end
+#         end
+#         adf[j] = convert(DataVector{T}, col)
+#     end
+#     return
+# end
+
+function json2df(s::String) # -> DataFrame
+    # TODO: Handle NA's properly
+    # TODO: Optimize memory access
+    # TODO: Implement and call tighttypes!(df)
+    arrayofhashes = JSON.parse(s)
+    nrows = length(arrayofhashes)
+    if nrows == 0
+        return DataFrame()
+    end
+    colnames = convert(Vector{UTF8String}, collect(keys(arrayofhashes[1])))
+    sort!(colnames)
+    # Check that keys are valid column names
+    ncols = length(colnames)
+    df = DataFrame(repeat([Any], inner = [ncols]), colnames, nrows)
+    for i in 1:nrows
+        for j in 1:ncols
+            df[i, j] = arrayofhashes[i][colnames[j]]
+        end
+    end
+    # tighttypes!(df)
+    clean_colnames!(df)
+    return df
+end
+
+function df2json(adf::AbstractDataFrame) # -> UTF8String
+    nrows, ncols = size(adf)
+    cnames = colnames(adf)
+    arrayofhashes = Array(Dict{UTF8String, Any}, nrows)
+    for i in 1:nrows
+        arrayofhashes[i] = Dict{UTF8String, Any}()
+        for j in 1:ncols
+            arrayofhashes[i][cnames[j]] = adf[i, j]
+        end
+    end
+    return JSON.json(arrayofhashes)
+end
diff --git a/test/json.jl b/test/json.jl
@@ -0,0 +1,36 @@
+module TestJSON
+    using Base.Test
+    using DataFrames
+    using DataFramesIO
+
+    s =  """[
+    {
+      "id":1,
+      "company":"Telstra",
+      "symbol":"ASX:TLS",
+      "price":5.27
+    },
+    {
+      "id":2,
+      "company":"BHP",
+      "symbol":"ASX:BHP",
+      "price":37.77
+    },
+    {
+      "id":3,
+      "company":"Commonwealth Bank of Australia",
+      "symbol":"ASX:CBA",
+      "price":77.58
+    }
+    ]"""
+
+    df = json2df(s)
+    @test isequal(size(df, 1), 3)
+    @test isequal(size(df, 2), 4)
+    @test isequal(colnames(df), ["company", "id", "price", "symbol"])
+    @test isequal(df[3, "id"], 3)
+    @test isequal(df[3, "price"], 77.58)
+    json = df2json(df)
+    df2 = json2df(json)
+    @test df == df2
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -0,0 +1,16 @@
+#
+# Correctness Tests
+#
+
+using Base.Test
+using DataFrames
+using DataFramesIO
+
+my_tests = ["json.jl"]
+
+@printf "Running tests:\n"
+
+for my_test in my_tests
+    @printf " * %s\n" my_test
+    include(my_test)
+end