Add support for specifying file extensions

queryverse · May 18, 2018 · 2f5edbe · 2f5edbe
1 parent 1c9a33d
commit 2f5edbe
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 26 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,2 +1,6 @@
+# VegaDatasets.jl v0.1.0 Release Notes
+* One can now specify an extension
+* More robust JSON loading
+
 # VegaDatasets.jl v0.0.1 Release Notes
 * Initial release
diff --git a/src/VegaDatasets.jl b/src/VegaDatasets.jl
@@ -5,37 +5,55 @@ using DataFrames, JSON, TextParse, Missings
 
 export dataset
 
-function dataset(name::AbstractString)
-    json_filename = joinpath(@__DIR__,"..","data", "data", "$name.json")
-    csv_filename = joinpath(@__DIR__,"..","data","data", "$name.csv")
-    if isfile(json_filename)
-        json_data = JSON.parsefile(json_filename)
-
-        #Iterate over all JSON elements, get keys, then take distinct keys
-        colnames = unique(vcat([collect(keys(d)) for d in json_data]...))
-
-        #Get column types
-        coltypes = Type[]
-        for col in colnames
-            col_type = typeof(get(json_data[1], col, Missings.missing))
-            for row in 2:length(json_data)
-                col_type = Base.promote_type(col_type, typeof(get(json_data[row], col, Missings.missing)))
-            end
-            push!(coltypes, col_type)
+function load_json(filename)
+    json_data = JSON.parsefile(filename)
+
+    #Iterate over all JSON elements, get keys, then take distinct keys
+    colnames = unique(vcat([collect(keys(d)) for d in json_data]...))
+
+    #Get column types
+    coltypes = Type[]
+    for col in colnames
+        col_type = typeof(get(json_data[1], col, Missings.missing))
+        for row in 2:length(json_data)
+            col_type = Base.promote_type(col_type, typeof(get(json_data[row], col, Missings.missing)))
         end
+        push!(coltypes, col_type)
+    end
 
-        df = DataFrame(coltypes, convert(Vector{Symbol}, colnames), 0)
+    df = DataFrame(coltypes, convert(Vector{Symbol}, colnames), 0)
 
-        for row in json_data
-            push!(df, ( [get(row, col, Missings.missing) for col in colnames]... ))
-        end
-        return df
-    elseif isfile(csv_filename)
-        data, header = csvread(csv_filename)
+    for row in json_data
+        push!(df, ( [get(row, col, Missings.missing) for col in colnames]... ))
+    end
+    return df
+end
 
-        return DataFrame([data...], [Symbol(i) for i in header])
+function load_csv(filename)
+    data, header = csvread(filename)
+
+    return DataFrame([data...], [Symbol(i) for i in header])
+end
+
+function dataset(name::AbstractString)
+    if isfile(joinpath(@__DIR__,"..","data", "data", name))
+        if splitext(name)[2]==".csv"
+            return load_csv(joinpath(@__DIR__,"..","data", "data", name))
+        elseif splitext(name)[2]==".json"
+            return load_json(joinpath(@__DIR__,"..","data", "data", name))
+        else
+            error("Unknown dataset.")
+        end 
     else
-        error("Unknown dataset.")
+        json_filename = joinpath(@__DIR__,"..","data", "data", "$name.json")
+        csv_filename = joinpath(@__DIR__,"..","data","data", "$name.csv")
+        if isfile(json_filename)
+            return load_json(json_filename)
+        elseif isfile(csv_filename)
+            return load_csv(csv_filename)
+        else
+            error("Unknown dataset.")
+        end
     end
 end
 

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -14,4 +14,12 @@ df2 = dataset("airports")
 
 @test_throws ErrorException dataset("doesnotexist")
 
+df3 = dataset("weather.csv")
+
+@test isa(df3, DataFrame)
+
+df4 = dataset("weather.json")
+
+@test isa(df4, DataFrame)
+
 end