From 20e7f35636854f61a7834f967c42d9f3ca4d9634 Mon Sep 17 00:00:00 2001 From: Ara Adkins Date: Mon, 2 Aug 2021 13:00:13 +0100 Subject: [PATCH] Fix a bounds-checking bug in CSV parsing (#1914) --- RELEASES.md | 2 ++ distribution/lib/Standard/Table/0.1.0/src/Io/Csv.enso | 9 +++++++-- .../src/main/java/org/enso/table/format/csv/Parser.java | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/RELEASES.md b/RELEASES.md index a491145b2d06..582a9fc25ca1 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -6,6 +6,8 @@ ([#1906](https://github.com/enso-org/enso/pull/1906)). - Added documentation for the new searcher categories ([#1910](https://github.com/enso-org/enso/pull/1910)). +- Fixed a bug where CSV files with very long lines could not be parsed + ([#1914](https://github.com/enso-org/enso/pull/1914)). # Enso 0.2.17 (2021-07-28) diff --git a/distribution/lib/Standard/Table/0.1.0/src/Io/Csv.enso b/distribution/lib/Standard/Table/0.1.0/src/Io/Csv.enso index 63a218f77252..36b97e37f0ba 100644 --- a/distribution/lib/Standard/Table/0.1.0/src/Io/Csv.enso +++ b/distribution/lib/Standard/Table/0.1.0/src/Io/Csv.enso @@ -64,14 +64,19 @@ from_csv : File.File | Text -> Boolean -> Text -> Table ! Parse_Error from_csv csv has_header=True prefix='C' = parser_inst = Parser.create has_header prefix + handle_error error = case error of + Polyglot_Error err -> Error.throw (Parse_Error err.getMessage) + _ -> Panic.throw error + case csv of Text -> input_stream = ByteArrayInputStream.new csv.utf_8.to_array - Table.Table (parser_inst.parse input_stream) + Panic.recover Table.Table (parser_inst.parse input_stream) . catch handle_error File.File _ -> - csv.with_input_stream [File.Option.Read] stream-> + maybe_err = Panic.recover <| csv.with_input_stream [File.Option.Read] stream-> stream.with_java_stream java_stream-> Table.Table (parser_inst.parse java_stream) + maybe_err.catch handle_error _ -> found_type_name = Meta.get_qualified_type_name csv file_name = Meta.get_qualified_type_name File.File diff --git a/std-bits/table/src/main/java/org/enso/table/format/csv/Parser.java b/std-bits/table/src/main/java/org/enso/table/format/csv/Parser.java index e9d269f1dfff..675eb0dda7b6 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/csv/Parser.java +++ b/std-bits/table/src/main/java/org/enso/table/format/csv/Parser.java @@ -45,6 +45,7 @@ public Table parse(InputStream inputStream) { CsvParserSettings settings = new CsvParserSettings(); settings.setHeaderExtractionEnabled(hasHeader); settings.detectFormatAutomatically(); + settings.setMaxCharsPerColumn(-1); CsvParser parser = new CsvParser(settings); parser.beginParsing(inputStream); StorageBuilder[] builders = null;