From 8bd8fbe065d9970bffeedaed2c4e03c3951b5b4d Mon Sep 17 00:00:00 2001 From: Shakil Thakur Date: Tue, 29 May 2018 19:31:28 -0500 Subject: [PATCH] returned headers should now be correct, BOM fix for headerless files and added dynamic_features_spec (#19) --- ftcsv.lua | 23 +- spec/dynamic_features_spec.lua | 463 +++++++++++++++++++++++++++++++++ 2 files changed, 484 insertions(+), 2 deletions(-) create mode 100644 spec/dynamic_features_spec.lua diff --git a/ftcsv.lua b/ftcsv.lua index ffa5366..1f1038d 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -295,6 +295,24 @@ local function parseString(inputString, inputLength, delimiter, i, headerField, return outResults end +-- determine the real headers as opposed to the header mapping +local function determineRealHeaders(headerField, fieldsToKeep) + local realHeaders = {} + local headerSet = {} + for i = 1, #headerField do + if not headerSet[headerField[i]] then + if fieldsToKeep ~= nil and fieldsToKeep[headerField[i]] then + table.insert(realHeaders, headerField[i]) + headerSet[headerField[i]] = true + elseif fieldsToKeep == nil then + table.insert(realHeaders, headerField[i]) + headerSet[headerField[i]] = true + end + end + end + return realHeaders +end + -- runs the show! function ftcsv.parse(inputFile, delimiter, options) -- delimiter MUST be one character @@ -373,7 +391,7 @@ function ftcsv.parse(inputFile, delimiter, options) -- for files where there aren't headers! if header == false then - i = 1 + i = startLine for j = 1, #headerField do headerField[j] = j end @@ -404,7 +422,8 @@ function ftcsv.parse(inputFile, delimiter, options) end local output = parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep) - return output, headerField + local realHeaders = determineRealHeaders(headerField, fieldsToKeep) + return output, realHeaders end -- a function that delimits " to "", used by the writer diff --git a/spec/dynamic_features_spec.lua b/spec/dynamic_features_spec.lua new file mode 100644 index 0000000..2b4b197 --- /dev/null +++ b/spec/dynamic_features_spec.lua @@ -0,0 +1,463 @@ +local ftcsv = require "ftcsv" + +local BOM = {["NO BOM"] = "", ["BOM"] = string.char(239, 187, 191)} +local newlines = {["LF"] = "\n", ["CRLF"] = "\r\n", ["CR"] = "\r"} +local endlines = {"NONE", "NEWLINE"} +local quotes = {["NO QUOTES"] = "", ["DOUBLE QUOTES"] = '"'} + +describe("csv features", function() + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle loading from string (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"a", "b", "c"} + local expected = {} + expected[1] = {} + expected[1].a = "apple" + expected[1].b = "banana" + expected[1].c = "carrot" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle renaming fields (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"d", "e", "f"} + local expected = {} + expected[1] = {} + expected[1].d = "apple" + expected[1].e = "banana" + expected[1].f = "carrot" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "f"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle renaming fields to the same out value (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"d", "e"} + local expected = {} + expected[1] = {} + expected[1].d = "apple" + expected[1].e = "carrot" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, rename={["a"] = "d", ["b"] = "e", ["c"] = "e"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle keeping only a few fields (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"a", "b"} + local expected = {} + expected[1] = {} + expected[1].a = "apple" + expected[1].b = "banana" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, fieldsToKeep={"a", "b"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle only keeping a few fields with a rename to an existing field (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"a", "b"} + local expected = {} + expected[1] = {} + expected[1].a = "apple" + expected[1].b = "carrot" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, rename={["c"] = "b"}, fieldsToKeep={"a","b"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle only keeping a few fields with a rename to a new field (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"a", "f"} + local expected = {} + expected[1] = {} + expected[1].a = "apple" + expected[1].f = "carrot" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"a","f"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should apply a function via headerFunc (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"A", "B", "C"} + local expected = {} + expected[1] = {} + expected[1].A = "apple" + expected[1].B = "banana" + expected[1].C = "carrot" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, headerFunc=string.upper} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should apply a function via headerFunc with rename and fieldsToKeep (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"A", "F"} + local expected = {} + expected[1] = {} + expected[1].A = "apple" + expected[1].F = "carrot" + + local defaultString = "%s`a`,`b`,`c`%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, rename={["c"] = "f"}, fieldsToKeep={"A","F"}, headerFunc=string.upper} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for _, endline in ipairs(endlines) do + local name = "should handle escaped doublequotes (%s + %s) EOF: %s" + it(name:format(bom, newline, endline), function() + local expectedHeaders = {"a", "b", "c"} + local expected = {} + expected[1] = {} + expected[1].a = '"apple"' + expected[1].b = '"banana"' + expected[1].c = '"carrot"' + + local defaultString = '%s"a","b","c"%s"""apple""","""banana""","""carrot"""%s' + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + + -- HEADERLESS TESTS START HERE + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle files without headers (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {1, 2, 3} + local expected = {} + expected[1] = {} + expected[1][1] = "apple" + expected[1][2] = "banana" + expected[1][3] = "carrot" + expected[2] = {} + expected[2][1] = "diamond" + expected[2][2] = "emerald" + expected[2][3] = "pearl" + + local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, headers=false} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle files without headers and with one row (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {1, 2, 3} + local expected = {} + expected[1] = {} + expected[1][1] = "apple" + expected[1][2] = "banana" + expected[1][3] = "carrot" + + local defaultString = "%s`apple`,`banana`,`carrot`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, "") + else + defaultString = defaultString:format(i, j) + end + + local options = {loadFromString=true, headers=false} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle renaming fields from files without headers (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"a", "b", "c"} + local expected = {} + expected[1] = {} + expected[1].a = "apple" + expected[1].b = "banana" + expected[1].c = "carrot" + expected[2] = {} + expected[2].a = "diamond" + expected[2].b = "emerald" + expected[2].c = "pearl" + + local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, headers=false, rename={"a","b","c"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle renaming fields from files without headers and only keeping a few fields (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"a", "b"} + local expected = {} + expected[1] = {} + expected[1].a = "apple" + expected[1].b = "banana" + expected[2] = {} + expected[2].a = "diamond" + expected[2].b = "emerald" + + local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, headers=false, rename={"a","b","c"}, fieldsToKeep={"a","b"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + + for bom, i in pairs(BOM) do + for newline, j in pairs(newlines) do + for quote, k in pairs(quotes) do + for _, endline in ipairs(endlines) do + local name = "should handle if the number of renames doesn't equal the number of fields (%s + %s + %s) EOF: %s" + it(name:format(bom, newline, quote, endline), function() + local expectedHeaders = {"a", "b"} + local expected = {} + expected[1] = {} + expected[1].a = "apple" + expected[1].b = "banana" + expected[2] = {} + expected[2].a = "diamond" + expected[2].b = "emerald" + + local defaultString = "%s`apple`,`banana`,`carrot`%s`diamond`,`emerald`,`pearl`%s" + defaultString = defaultString:gsub("`", k) + + if endline == "NONE" then + defaultString = defaultString:format(i, j, "") + else + defaultString = defaultString:format(i, j, j) + end + + local options = {loadFromString=true, headers=false, rename={"a","b"}, fieldsToKeep={"a","b"}} + local actual, actualHeaders = ftcsv.parse(defaultString, ",", options) + assert.are.same(expected, actual) + assert.are.same(expectedHeaders, actualHeaders) + end) + end + end + end + end + +end) \ No newline at end of file