Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backported header and BOM fix from parseLineIterator and added dynami… #19

Merged
merged 1 commit into from
May 30, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
backported header and BOM fix from parseLineIterator and added dynami…
…c_features_spec
FourierTransformer committed May 28, 2018

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit 2b998a234f6972cc430d0a4c960a0139b8d7aeb4
23 changes: 21 additions & 2 deletions ftcsv.lua
Original file line number Diff line number Diff line change
@@ -295,6 +295,24 @@ local function parseString(inputString, inputLength, delimiter, i, headerField,
return outResults
end

-- determine the real headers as opposed to the header mapping
local function determineRealHeaders(headerField, fieldsToKeep)
local realHeaders = {}
local headerSet = {}
for i = 1, #headerField do
if not headerSet[headerField[i]] then
if fieldsToKeep ~= nil and fieldsToKeep[headerField[i]] then
table.insert(realHeaders, headerField[i])
headerSet[headerField[i]] = true
elseif fieldsToKeep == nil then
table.insert(realHeaders, headerField[i])
headerSet[headerField[i]] = true
end
end
end
return realHeaders
end

-- runs the show!
function ftcsv.parse(inputFile, delimiter, options)
-- delimiter MUST be one character
@@ -373,7 +391,7 @@ function ftcsv.parse(inputFile, delimiter, options)

-- for files where there aren't headers!
if header == false then
i = 1
i = startLine
for j = 1, #headerField do
headerField[j] = j
end
@@ -404,7 +422,8 @@ function ftcsv.parse(inputFile, delimiter, options)
end

local output = parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
return output, headerField
local realHeaders = determineRealHeaders(headerField, fieldsToKeep)
return output, realHeaders
end

-- a function that delimits " to "", used by the writer
463 changes: 463 additions & 0 deletions spec/dynamic_features_spec.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,463 @@
local ftcsv = require "ftcsv"

local BOM = {["NO BOM"] = "", ["BOM"] = string.char(239, 187, 191)}
local newlines = {["LF"] = "\n", ["CRLF"] = "\r\n", ["CR"] = "\r"}
local endlines = {"NONE", "NEWLINE"}
local quotes = {["NO QUOTES"] = "", ["DOUBLE QUOTES"] = '"'}

describe("csv features", function()
for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle loading from string (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b", "c"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[1].c = "carrot"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"d", "e", "f"}
local expected = {}
expected[1] = {}
expected[1].d = "apple"
expected[1].e = "banana"
expected[1].f = "carrot"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "f"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields to the same out value (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"d", "e"}
local expected = {}
expected[1] = {}
expected[1].d = "apple"
expected[1].e = "carrot"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "e"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle keeping only a few fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, fieldsToKeep={"a", "b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle only keeping a few fields with a rename to an existing field (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "carrot"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, rename={["c"] = "b"}, fieldsToKeep={"a","b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle only keeping a few fields with a rename to a new field (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "f"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].f = "carrot"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"a","f"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should apply a function via headerFunc (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"A", "B", "C"}
local expected = {}
expected[1] = {}
expected[1].A = "apple"
expected[1].B = "banana"
expected[1].C = "carrot"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, headerFunc=string.upper}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should apply a function via headerFunc with rename and fieldsToKeep (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"A", "F"}
local expected = {}
expected[1] = {}
expected[1].A = "apple"
expected[1].F = "carrot"

local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"A","F"}, headerFunc=string.upper}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for _, endline in ipairs(endlines) do
local name = "should handle escaped doublequotes (%s + %s) EOF: %s"
it(name:format(bom, newline, endline), function()
local expectedHeaders = {"a", "b", "c"}
local expected = {}
expected[1] = {}
expected[1].a = '"apple"'
expected[1].b = '"banana"'
expected[1].c = '"carrot"'

local defaultString = '%s"a","b","c"%s"""apple""","""banana""","""carrot"""%s'

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end

-- HEADERLESS TESTS START HERE

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle files without headers (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {1, 2, 3}
local expected = {}
expected[1] = {}
expected[1][1] = "apple"
expected[1][2] = "banana"
expected[1][3] = "carrot"
expected[2] = {}
expected[2][1] = "diamond"
expected[2][2] = "emerald"
expected[2][3] = "pearl"

local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, headers=false}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle files without headers and with one row (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {1, 2, 3}
local expected = {}
expected[1] = {}
expected[1][1] = "apple"
expected[1][2] = "banana"
expected[1][3] = "carrot"

local defaultString = "%s`apple`,`banana`,`carrot`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, "")
else
defaultString = defaultString:format(i, j)
end

local options = {loadFromString=true, headers=false}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields from files without headers (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b", "c"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[1].c = "carrot"
expected[2] = {}
expected[2].a = "diamond"
expected[2].b = "emerald"
expected[2].c = "pearl"

local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, headers=false, rename={"a","b","c"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle renaming fields from files without headers and only keeping a few fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[2] = {}
expected[2].a = "diamond"
expected[2].b = "emerald"

local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, headers=false, rename={"a","b","c"}, fieldsToKeep={"a","b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

for bom, i in pairs(BOM) do
for newline, j in pairs(newlines) do
for quote, k in pairs(quotes) do
for _, endline in ipairs(endlines) do
local name = "should handle if the number of renames doesn't equal the number of fields (%s + %s + %s) EOF: %s"
it(name:format(bom, newline, quote, endline), function()
local expectedHeaders = {"a", "b"}
local expected = {}
expected[1] = {}
expected[1].a = "apple"
expected[1].b = "banana"
expected[2] = {}
expected[2].a = "diamond"
expected[2].b = "emerald"

local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s"
defaultString = defaultString:gsub("`", k)

if endline == "NONE" then
defaultString = defaultString:format(i, j, "")
else
defaultString = defaultString:format(i, j, j)
end

local options = {loadFromString=true, headers=false, rename={"a","b"}, fieldsToKeep={"a","b"}}
local actual, actualHeaders = ftcsv.parse(defaultString, ",", options)
assert.are.same(expected, actual)
assert.are.same(expectedHeaders, actualHeaders)
end)
end
end
end
end

end)