From 73f86c41d4a9c10c308668a511c7955f71e4eaae Mon Sep 17 00:00:00 2001 From: Abe Varghese Date: Tue, 23 Jul 2024 10:46:39 +0530 Subject: [PATCH] [native] Improvement in parsing data --- .gitignore | 4 ++ .../ContainerQueryRunnerUtils.java | 38 ++++++------------- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 2caab436965ed..828f219092375 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,7 @@ a.out *.pb.h *.pb.cc *_pb2.py + +# Compiled executables used for docker build +/docker/presto-cli-*-executable.jar +/docker/presto-server-*.tar.gz diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunnerUtils.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunnerUtils.java index 89f9c00cbe3e6..7ed27c116a4b6 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunnerUtils.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunnerUtils.java @@ -30,7 +30,6 @@ import java.io.OutputStreamWriter; import java.io.StringReader; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; @@ -226,49 +225,39 @@ public static void createScriptFile(String filePath, String scriptContent) public static MaterializedResult toMaterializedResult(String csvData) throws IOException { - List columnTypes = new ArrayList<>(); - - // Parse CSV data using OpenCSV - ImmutableList> allRows = getLists(csvData); + List allRows = parseCsvData(csvData); // Infer column types based on the maximum columns found - int maxColumns = allRows.stream().mapToInt(List::size).max().orElse(0); + int maxColumns = allRows.stream().mapToInt(row -> row.length).max().orElse(0); + ImmutableList.Builder columnTypesBuilder = ImmutableList.builder(); for (int i = 0; i < maxColumns; i++) { final int columnIndex = i; - columnTypes.add(inferType(allRows.stream() - .map(row -> columnIndex < row.size() ? row.get(columnIndex) : "") + columnTypesBuilder.add(inferType(allRows.stream() + .map(row -> columnIndex < row.length ? row[columnIndex] : "") .collect(Collectors.toList()))); } + ImmutableList columnTypes = columnTypesBuilder.build(); // Convert all rows to MaterializedRow ImmutableList.Builder rowsBuilder = ImmutableList.builder(); - for (List columns : allRows) { + for (String[] columns : allRows) { ImmutableList.Builder valuesBuilder = ImmutableList.builder(); for (int i = 0; i < columnTypes.size(); i++) { - valuesBuilder.add(i < columns.size() ? convertToType(columns.get(i), columnTypes.get(i)) : null); + valuesBuilder.add(i < columns.length ? convertToType(columns[i], columnTypes.get(i)) : null); } rowsBuilder.add(new MaterializedRow(5, valuesBuilder.build())); } - - ImmutableList materializedRows = rowsBuilder.build(); + ImmutableList rows = rowsBuilder.build(); // Create and return the MaterializedResult - return new MaterializedResult(materializedRows, columnTypes); + return new MaterializedResult(rows, columnTypes); } - private static ImmutableList> getLists(String csvData) + private static List parseCsvData(String csvData) throws IOException { CSVReader reader = new CSVReader(new StringReader(csvData)); - List records = reader.readAll(); - - // Collect all rows as lists of strings - ImmutableList.Builder> allRowsBuilder = ImmutableList.builder(); - for (String[] record : records) { - allRowsBuilder.add(ImmutableList.copyOf(record)); - } - ImmutableList> allRows = allRowsBuilder.build(); - return allRows; + return ImmutableList.copyOf(reader.readAll()); } private static Type inferType(List values) @@ -305,9 +294,6 @@ else if (isBoolean) { private static Object convertToType(String value, Type type) { - if (value.isEmpty()) { - return null; - } if (type.equals(VarcharType.VARCHAR)) { return value; }