Skip to content

Commit

Permalink
Add support for specifying file extensions
Browse files Browse the repository at this point in the history
  • Loading branch information
davidanthoff committed May 18, 2018
1 parent 1c9a33d commit 2f5edbe
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 26 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# VegaDatasets.jl v0.1.0 Release Notes
* One can now specify an extension
* More robust JSON loading

# VegaDatasets.jl v0.0.1 Release Notes
* Initial release
70 changes: 44 additions & 26 deletions src/VegaDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,55 @@ using DataFrames, JSON, TextParse, Missings

export dataset

function dataset(name::AbstractString)
json_filename = joinpath(@__DIR__,"..","data", "data", "$name.json")
csv_filename = joinpath(@__DIR__,"..","data","data", "$name.csv")
if isfile(json_filename)
json_data = JSON.parsefile(json_filename)

#Iterate over all JSON elements, get keys, then take distinct keys
colnames = unique(vcat([collect(keys(d)) for d in json_data]...))

#Get column types
coltypes = Type[]
for col in colnames
col_type = typeof(get(json_data[1], col, Missings.missing))
for row in 2:length(json_data)
col_type = Base.promote_type(col_type, typeof(get(json_data[row], col, Missings.missing)))
end
push!(coltypes, col_type)
function load_json(filename)
json_data = JSON.parsefile(filename)

#Iterate over all JSON elements, get keys, then take distinct keys
colnames = unique(vcat([collect(keys(d)) for d in json_data]...))

#Get column types
coltypes = Type[]
for col in colnames
col_type = typeof(get(json_data[1], col, Missings.missing))
for row in 2:length(json_data)
col_type = Base.promote_type(col_type, typeof(get(json_data[row], col, Missings.missing)))
end
push!(coltypes, col_type)
end

df = DataFrame(coltypes, convert(Vector{Symbol}, colnames), 0)
df = DataFrame(coltypes, convert(Vector{Symbol}, colnames), 0)

for row in json_data
push!(df, ( [get(row, col, Missings.missing) for col in colnames]... ))
end
return df
elseif isfile(csv_filename)
data, header = csvread(csv_filename)
for row in json_data
push!(df, ( [get(row, col, Missings.missing) for col in colnames]... ))
end
return df
end

return DataFrame([data...], [Symbol(i) for i in header])
function load_csv(filename)
data, header = csvread(filename)

return DataFrame([data...], [Symbol(i) for i in header])
end

function dataset(name::AbstractString)
if isfile(joinpath(@__DIR__,"..","data", "data", name))
if splitext(name)[2]==".csv"
return load_csv(joinpath(@__DIR__,"..","data", "data", name))
elseif splitext(name)[2]==".json"
return load_json(joinpath(@__DIR__,"..","data", "data", name))
else
error("Unknown dataset.")
end
else
error("Unknown dataset.")
json_filename = joinpath(@__DIR__,"..","data", "data", "$name.json")
csv_filename = joinpath(@__DIR__,"..","data","data", "$name.csv")
if isfile(json_filename)
return load_json(json_filename)
elseif isfile(csv_filename)
return load_csv(csv_filename)
else
error("Unknown dataset.")
end
end
end

Expand Down
8 changes: 8 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,12 @@ df2 = dataset("airports")

@test_throws ErrorException dataset("doesnotexist")

df3 = dataset("weather.csv")

@test isa(df3, DataFrame)

df4 = dataset("weather.json")

@test isa(df4, DataFrame)

end

0 comments on commit 2f5edbe

Please sign in to comment.