From f92740c21991ee62b1a6bc64db317c0dd4689ef1 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 26 Apr 2022 15:42:53 +0100 Subject: [PATCH 01/28] Starting work --- .../org/enso/table/format/xlsx/Range.java | 21 +++++++++++++ .../org/enso/table/format/xlsx/Reader.java | 30 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java new file mode 100644 index 000000000000..f47b9b1fe1fb --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -0,0 +1,21 @@ +package org.enso.table.format.xlsx; + +import java.util.regex.Pattern; + +public class Range { + private static Pattern pattern = new Pattern(""); + + private final String sheetName; + private final int leftColumn; + private final int rightColumn; + private final int topRow; + private final int bottowRow; + + public Range(String rangeAddress) { + + } + + // ^('([^']+)'|([^'!]+))!([A-Z]+\d+|R\d+C\d+)(:([A-Z]+\d+))?$ + // \$?[A-Z]{1,3}\$?\d+ + // (\$?[A-Z]{1,3}\$?\d+)(?::(\$?[A-Z]{1,3}\$?\d+))? +} diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index eb95721c54c5..bd7f40dc932d 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -3,6 +3,7 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.*; import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.xssf.usermodel.XSSFName; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.InferredBuilder; @@ -215,4 +216,33 @@ private static Object getCellValue( } return null; } + + public static String[] SheetNames(InputStream stream) + throws IOException + { + XSSFWorkbook workbook = new XSSFWorkbook(stream); + int sheetCount = workbook.getNumberOfSheets(); + var output = new String[sheetCount]; + for (int i = 0; i < sheetCount; i++) { + output[i] = workbook.getSheetName(i); + } + return output; + } + + public static String[] RangeNames(InputStream stream) + throws IOException + { + XSSFWorkbook workbook = new XSSFWorkbook(stream); + return workbook.getAllNames().stream().map(XSSFName::getNameName).toArray(String[]::new); + } + + public static Table ReadRangeByName(InputStream stream, String nameOrAddress) + throws IOException + { + XSSFWorkbook workbook = new XSSFWorkbook(stream); + + // Sheet!From[:To] + } + + } From 18ca6423a63df3dca001fea40a683752bf5f76e0 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 27 Apr 2022 14:57:50 +0100 Subject: [PATCH 02/28] Range parser --- .../org/enso/table/format/xlsx/Range.java | 165 +++++++++++++++++- .../org/enso/table/format/xlsx/Reader.java | 12 +- 2 files changed, 166 insertions(+), 11 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index f47b9b1fe1fb..d9fa74e3c584 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -1,21 +1,174 @@ package org.enso.table.format.xlsx; +import java.util.Optional; +import java.util.function.Function; +import java.util.regex.Matcher; import java.util.regex.Pattern; public class Range { - private static Pattern pattern = new Pattern(""); + private static final Pattern FULL_ADDRESS = Pattern.compile("^('[^']+'|[^'!]+)!(.*)$"); + + private static String[] parseFullAddress(String fullAddress) { + if (fullAddress == null) { + throw new IllegalArgumentException("fullAddress cannot be NULL."); + } + + Matcher matcher = FULL_ADDRESS.matcher(fullAddress); + if (!matcher.find()) { + throw new IllegalArgumentException("'" + fullAddress + "' is not valid format."); + } + + return new String[] {matcher.group(0), matcher.group(1)}; + } + + private static final String ADDRESS_A1 = "\\$?[A-Z]{1,3}\\$?\\d+"; + private static final String ADDRESS_COL = "\\$?[A-Z]{1,3}"; + private static final String ADDRESS_ROW = "\\$?\\d+"; + private static final String ADDRESS_RC = "R(?:\\[\\d+]|\\d+)C(?:\\[\\d+]|\\d+)"; + + private static final Pattern RANGE_A1 = + Pattern.compile("(" + ADDRESS_A1 + ")(?::(" + ADDRESS_A1 + "))?"); + private static final Pattern RANGE_COL = + Pattern.compile("(" + ADDRESS_COL + ")(?::(" + ADDRESS_COL + "))?"); + private static final Pattern RANGE_ROW = + Pattern.compile("(" + ADDRESS_ROW + ")(?::(" + ADDRESS_ROW + "))?"); + private static final Pattern RANGE_RC = + Pattern.compile("(" + ADDRESS_RC + ")(?::(" + ADDRESS_RC + "))?"); + + private static int[] parseRange(String range) { + for (Pattern pattern : new Pattern[] {RANGE_A1, RANGE_COL, RANGE_ROW, RANGE_RC}) { + Optional parsed = + parseRange(range, pattern, pattern == RANGE_RC ? Range::parseRC : Range::parseA1); + + if (parsed.isPresent()) { + return parsed.get(); + } + } + + throw new IllegalArgumentException("Unsupported range address: " + range); + } + + private static Optional parseRange( + String range, Pattern pattern, Function parser) { + Matcher matcher = pattern.matcher(range.toUpperCase()); + if (!matcher.find()) { + return Optional.empty(); + } + + int[] tl = parser.apply(matcher.group(0)); + if (matcher.group(2) == null) { + return Optional.of(new int[] {tl[0], tl[1], tl[0], tl[1]}); + } + + int[] br = parser.apply(matcher.group(1)); + return Optional.of( + new int[] { + Math.min(tl[0], br[0]), + Math.min(tl[1], br[1]), + Math.max(tl[0], br[0]), + Math.max(tl[1], br[1]) + }); + } + + private static boolean isLetter(char c) { + return c >= 'A' && c <= 'Z'; + } + + private static boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + private static int[] parseA1(CharSequence address) { + int col = 0; + + int index = 0; + while (index < address.length() && isLetter(address.charAt(index))) { + col = 26 * col + (address.charAt(index) - 'A' + 1); + index++; + } + + int row = index < address.length() ? Integer.parseInt(address, index, address.length(), 10) : 0; + return new int[] {row, col}; + } + + private static int[] parseRC(CharSequence address) { + int index = 0; + + int row = 0; + if (index < address.length() && address.charAt(index) == 'R') { + // Parse Row + int endIndex = index + 1; + while (endIndex < address.length() && isDigit(address.charAt(endIndex))) { + endIndex++; + } + + if (endIndex == index + 1) { + throw new IllegalArgumentException("R1C1 style addresses must be absolute."); + } + + row = Integer.parseInt(address, index + 1, endIndex, 10); + } + + int col = 0; + if (index < address.length() && address.charAt(index) == 'C') { + // Parse Row + int endIndex = index + 1; + while (endIndex < address.length() && isDigit(address.charAt(endIndex))) { + endIndex++; + } + + if (endIndex == index + 1) { + throw new IllegalArgumentException("R1C1 style addresses must be absolute."); + } + + col = Integer.parseInt(address, index + 1, endIndex, 10); + } + + return new int[] {row, col}; + } private final String sheetName; private final int leftColumn; private final int rightColumn; private final int topRow; - private final int bottowRow; + private final int bottomRow; + + public Range(String fullAddress) { + String[] sheetAndRange = parseFullAddress(fullAddress); + this.sheetName = sheetAndRange[0]; + + int[] range = parseRange(sheetAndRange[1]); + this.leftColumn = range[1]; + this.rightColumn = range[3]; + this.topRow = range[0]; + this.bottomRow = range[2]; + } + + public String getSheetName() { + return sheetName; + } - public Range(String rangeAddress) { + public boolean isWholeRow() { + return leftColumn == 0; + } + + public int getLeftColumn() { + return leftColumn; + } + public int getRightColumn() { + return rightColumn; } - // ^('([^']+)'|([^'!]+))!([A-Z]+\d+|R\d+C\d+)(:([A-Z]+\d+))?$ - // \$?[A-Z]{1,3}\$?\d+ - // (\$?[A-Z]{1,3}\$?\d+)(?::(\$?[A-Z]{1,3}\$?\d+))? + public boolean isWholeColumn() { + return topRow == 0; + } + + public int getTopRow() { + return topRow; + } + + public int getBottomRow() { + return bottomRow; + } } diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index bd7f40dc932d..b1e7f969e566 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -1,7 +1,12 @@ package org.enso.table.format.xlsx; import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.ss.usermodel.*; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.util.CellRangeAddress; import org.apache.poi.xssf.usermodel.XSSFName; import org.apache.poi.xssf.usermodel.XSSFWorkbook; @@ -240,9 +245,6 @@ public static Table ReadRangeByName(InputStream stream, String nameOrAddress) throws IOException { XSSFWorkbook workbook = new XSSFWorkbook(stream); - - // Sheet!From[:To] + return null; } - - } From d0186538e2a5d9d052622e9dc74cedc79898ea5f Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 27 Apr 2022 16:47:18 +0100 Subject: [PATCH 03/28] Range parser --- .../Standard/Table/0.0.0-dev/src/Error.enso | 7 +++++++ .../Table/0.0.0-dev/src/Io/Excel_Section.enso | 13 ++++++++++++ .../Table/0.0.0-dev/src/Io/File_Format.enso | 21 +++++++++++++++++++ .../org/enso/table/format/xlsx/Range.java | 4 ++-- 4 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso index 3bac2ddeaee1..dc7f8b2ea4dc 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso @@ -111,3 +111,10 @@ type Mismatched_Quote ## Indicates an unexpected parser error. type Parser_Error cause + +## Indicates that a specified location was not valid. +type Invalid_Location (location:Text) + +Invalid_Location.to_display_text : Text +Invalid_Location.to_display_text = + "The location '"+this.location+"' is not valid." \ No newline at end of file diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso new file mode 100644 index 000000000000..a4ccb548df35 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso @@ -0,0 +1,13 @@ +from Standard.Base import all + +## Gets a list of sheets within a workbook +type Sheet_Names + +## Gets a list of named ranges within a workbook +type Range_Names + +## Gets the data from a specific sheet. Column names are the Excel column names. +type Sheet (sheet:Integer|Text) (skip_rows:Integer|Nothing=Nothing) (row_limit:Integer|Nothing=Nothing) + +## Gets a specific range (taking either a defined name or external style address) from the workbook +type Range (address:Text) (skip_rows:Integer|Nothing=Nothing) (row_limit:Integer|Nothing=Nothing) \ No newline at end of file diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index 4079049b679d..b05632a4ce9b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -6,6 +6,12 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding import Standard.Table.Internal.Delimited_Reader +from Standard.Table.Error import Invalid_Location +import Standard.Table.Io.Excel_Section + +polyglot java import org.enso.table.format.xlsx.Range +polyglot java import java.lang.IllegalArgumentException + ## This type needs to be here to allow for the usage of Standard.Table functions. Ideally, it would be an interface within Standard.Base and expanded by additional implementations in Standard.Table. @@ -112,3 +118,18 @@ type Delimited ## A setting to infer the default behaviour of some option. type Infer + +## Read the file to a `Table` from an Excel file +type Excel + type Excel (section:Excel_Section=Excel_Section.Sheet_Names) + + parse_address : String -> Pair String Vector + parse_address address = + parsed = Panic.catch IllegalArgumentException (Range.new address) _-> + Error.throw (Invalid_Location address) + Pair parsed.getSheetName [parsed.getTopRow, parsed.getLeftColumn, parsed.getBottomRow, parsed.getRightColumn] + + ## Implements the `File.read` for this `File_Format` + read : File -> Problem_Behavior -> Any + read file on_problems = + Panic.throw "To Do" diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index d9fa74e3c584..b5158821d007 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -6,7 +6,7 @@ import java.util.regex.Pattern; public class Range { - private static final Pattern FULL_ADDRESS = Pattern.compile("^('[^']+'|[^'!]+)!(.*)$"); + private static final Pattern FULL_ADDRESS = Pattern.compile("^('[^']+'|[^'!]+)!(.+)$"); private static String[] parseFullAddress(String fullAddress) { if (fullAddress == null) { @@ -18,7 +18,7 @@ private static String[] parseFullAddress(String fullAddress) { throw new IllegalArgumentException("'" + fullAddress + "' is not valid format."); } - return new String[] {matcher.group(0), matcher.group(1)}; + return new String[] {matcher.group(1), matcher.group(2)}; } private static final String ADDRESS_A1 = "\\$?[A-Z]{1,3}\\$?\\d+"; From 1ece00abd8cc601148cef79d68c8cdcdaa8d6395 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 27 Apr 2022 17:55:51 +0100 Subject: [PATCH 04/28] Range parser --- .../lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso | 1 + .../table/src/main/java/org/enso/table/format/xlsx/Range.java | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index b05632a4ce9b..9773405ca442 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -123,6 +123,7 @@ type Infer type Excel type Excel (section:Excel_Section=Excel_Section.Sheet_Names) + ## Given a string parse into an Excel Range parse_address : String -> Pair String Vector parse_address address = parsed = Panic.catch IllegalArgumentException (Range.new address) _-> diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index b5158821d007..731399ffa89f 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -55,12 +55,12 @@ private static Optional parseRange( return Optional.empty(); } - int[] tl = parser.apply(matcher.group(0)); + int[] tl = parser.apply(matcher.group(1)); if (matcher.group(2) == null) { return Optional.of(new int[] {tl[0], tl[1], tl[0], tl[1]}); } - int[] br = parser.apply(matcher.group(1)); + int[] br = parser.apply(matcher.group(2)); return Optional.of( new int[] { Math.min(tl[0], br[0]), From a2ff0b2cb403811a7865734d08dc1b16cdb13b69 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 28 Apr 2022 17:44:33 +0100 Subject: [PATCH 05/28] Close to functional --- .../Table/0.0.0-dev/src/Io/Excel_Section.enso | 4 +- .../Table/0.0.0-dev/src/Io/File_Format.enso | 17 +- .../Table/0.0.0-dev/src/Io/File_Read.enso | 1 + .../org/enso/table/format/xlsx/Reader.java | 163 +++++++++++++++--- 4 files changed, 159 insertions(+), 26 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso index a4ccb548df35..681b599ddda3 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso @@ -7,7 +7,7 @@ type Sheet_Names type Range_Names ## Gets the data from a specific sheet. Column names are the Excel column names. -type Sheet (sheet:Integer|Text) (skip_rows:Integer|Nothing=Nothing) (row_limit:Integer|Nothing=Nothing) +type Sheet (sheet:Integer|Text) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) ## Gets a specific range (taking either a defined name or external style address) from the workbook -type Range (address:Text) (skip_rows:Integer|Nothing=Nothing) (row_limit:Integer|Nothing=Nothing) \ No newline at end of file +type Range (address:Text) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) \ No newline at end of file diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index 9773405ca442..a963ba5bb62c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -7,11 +7,16 @@ from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding import Standard.Table.Internal.Delimited_Reader from Standard.Table.Error import Invalid_Location + +import Standard.Table.Data.Table import Standard.Table.Io.Excel_Section +polyglot java import org.enso.table.format.xlsx.Reader polyglot java import org.enso.table.format.xlsx.Range polyglot java import java.lang.IllegalArgumentException +polyglot java import java.io.IOException + ## This type needs to be here to allow for the usage of Standard.Table functions. Ideally, it would be an interface within Standard.Base and expanded by additional implementations in Standard.Table. @@ -133,4 +138,14 @@ type Excel ## Implements the `File.read` for this `File_Format` read : File -> Problem_Behavior -> Any read file on_problems = - Panic.throw "To Do" + reader stream = case this.section of + Excel_Section.Sheet_Names -> Vector.Vector (Reader.readSheetNames stream) + Excel_Section.Range_Names -> Vector.Vector (Reader.readRangeNames stream) + Excel_Section.Sheet sheet skip_rows row_limit -> + Table.Table <| + if sheet.is_an Integer then (Reader.readSheetByIndex stream sheet skip_rows row_limit) else + Reader.readSheetByName stream sheet skip_rows row_limit + Excel_Section.Range address skip_rows row_limit -> Table.Table (Reader.readRange stream address skip_rows row_limit) + + file.with_input_stream [File.Option.Read] stream-> + stream.with_java_stream reader diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Read.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Read.enso index b3d258dbd3f0..b6f87b8c0d7f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Read.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Read.enso @@ -1,5 +1,6 @@ from Standard.Base import all from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning + import Standard.Table.Io.File_Format ## ALIAS Read Text File, Read File diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index b1e7f969e566..03beeb0d00da 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -2,13 +2,14 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.Name; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.util.CellRangeAddress; -import org.apache.poi.xssf.usermodel.XSSFName; +import org.apache.poi.ss.util.CellReference; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.InferredBuilder; @@ -23,7 +24,9 @@ import java.util.ArrayList; import java.util.List; import java.util.function.Function; +import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; /** A table reader for MS Excel files. */ public class Reader { @@ -182,7 +185,11 @@ private static Table read_table( if (cell == null) { builders.get(j - minCol).append(null); } else { - builders.get(j - minCol).append(getCellValue(cell, mkDate)); + Object value = getCellValue(cell); + if (value instanceof LocalDate) { + value = mkDate.apply((LocalDate) value); + } + builders.get(j - minCol).append(getCellValue(cell)); } } } @@ -193,38 +200,108 @@ private static Table read_table( return new Table(columns); } - private static Object getCellValue(Cell cell, Function mkDate) { - return getCellValue(cell, cell.getCellType(), mkDate); + private static Table readSheetToTable(Sheet sheet, Range range, int skipRows, int rowCount) { + // Row Range + int firstRow = sheet.getFirstRowNum() + 1; + int lastRow = sheet.getLastRowNum() + 1; + int startRow = (range == null || range.isWholeColumn() ? 1 : range.getTopRow()) + skipRows; + int endRow = + Math.min( + range == null || range.isWholeColumn() ? lastRow : range.getBottomRow(), + startRow + rowCount - 1); + + // Columns + int startCol = (range == null || range.isWholeRow() ? 1 : range.getLeftColumn()); + int endCol = (range == null || range.isWholeRow() ? -1 : range.getRightColumn()); + List builders = + endCol == -1 + ? new ArrayList<>() + : IntStream.range(startCol, endCol + 1) + .mapToObj(i -> new InferredBuilder(endRow - startRow + 1)) + .collect(Collectors.toList()); + + // Read Cell Data + for (int row = startRow; row <= endRow; row++) { + if (row < firstRow || row > lastRow) { + builders.forEach(b -> b.append(null)); + } else { + Row currentRow = sheet.getRow(row - 1); + + int currentEndCol = endCol == -1 ? currentRow.getLastCellNum() + 1 : endCol; + for (int i = builders.size(); i <= currentEndCol - startCol; i++) { + Builder builder = new InferredBuilder(endRow - startRow + 1); + builder.appendNulls(row - startRow); + builders.add(builder); + } + + int firstCol = currentRow.getFirstCellNum() + 1; + int lastCol = currentRow.getLastCellNum(); + for (int col = startCol; col <= currentEndCol; col++) { + Object value = + col < firstCol || col > lastCol ? null : getCellValue(currentRow.getCell(col - 1)); + builders.get(col - startCol).append(value); + } + } + } + + // Create Table + Column[] columns = + IntStream.range(0, builders.size()) + .mapToObj( + idx -> + new Column( + CellReference.convertNumToColString(startCol + idx), + builders.get(idx).seal())) + .toArray(Column[]::new); + + return new Table(columns); + } + + private static String getRefersTo(Workbook workbook, String rangeName) { + for (Name name : workbook.getAllNames()) { + if (name.getNameName().equalsIgnoreCase(rangeName)) { + return name.getRefersToFormula(); + } + } + return null; + } + + private static int getSheetIndex(Workbook workbook, String sheetName) { + int sheetCount = workbook.getNumberOfSheets(); + for (int i = 0; i < sheetCount; i++) { + if (workbook.getSheetName(i).equalsIgnoreCase(sheetName)) { + return i; + } + } + return -1; } - private static Object getCellValue( - Cell cell, CellType cellType, Function mkDate) { + private static Object getCellValue(Cell cell) { + CellType cellType = cell.getCellType(); + if (cellType == CellType.FORMULA) { + cellType = cell.getCachedFormulaResultType(); + } + switch (cellType) { - case FORMULA: - return getCellValue(cell, cell.getCachedFormulaResultType(), mkDate); case NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { - return mkDate.apply(cell.getLocalDateTimeCellValue().toLocalDate()); + return cell.getLocalDateTimeCellValue().toLocalDate(); } else { return cell.getNumericCellValue(); } case STRING: return cell.getStringCellValue(); - case ERROR: - return null; case BOOLEAN: return cell.getBooleanCellValue(); + case ERROR: case BLANK: - return null; case _NONE: return null; } return null; } - public static String[] SheetNames(InputStream stream) - throws IOException - { + public static String[] readSheetNames(InputStream stream) throws IOException { XSSFWorkbook workbook = new XSSFWorkbook(stream); int sheetCount = workbook.getNumberOfSheets(); var output = new String[sheetCount]; @@ -234,17 +311,57 @@ public static String[] SheetNames(InputStream stream) return output; } - public static String[] RangeNames(InputStream stream) - throws IOException - { + public static String[] readRangeNames(InputStream stream) throws IOException { + Workbook workbook = new XSSFWorkbook(stream); + return workbook.getAllNames().stream().map(Name::getNameName).toArray(String[]::new); + } + + public static Table readSheetByName( + InputStream stream, String sheetName, Integer skip_rows, Integer row_limit) + throws IOException, IllegalArgumentException { + Workbook workbook = new XSSFWorkbook(stream); + + int sheetIndex = getSheetIndex(workbook, sheetName); + if (sheetIndex == -1) { + throw new IllegalArgumentException("Unknown sheet '" + sheetName + "'."); + } + + Sheet sheet = workbook.getSheetAt(sheetIndex); + return readSheetToTable(sheet, null, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); + } + + public static Table readSheetByIndex( + InputStream stream, int index, Integer skip_rows, Integer row_limit) + throws IOException, IllegalArgumentException { XSSFWorkbook workbook = new XSSFWorkbook(stream); - return workbook.getAllNames().stream().map(XSSFName::getNameName).toArray(String[]::new); + + int sheetCount = workbook.getNumberOfSheets(); + if (index < 1 || index > sheetCount) { + throw new IllegalArgumentException( + "Sheet index is not in valid range (1 to " + sheetCount + " inclusive)."); + } + + Sheet sheet = workbook.getSheetAt(index - 1); + return readSheetToTable(sheet, null, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); } - public static Table ReadRangeByName(InputStream stream, String nameOrAddress) - throws IOException - { + public static Table readRange( + InputStream stream, String nameOrAddress, Integer skip_rows, Integer row_limit) + throws IOException { XSSFWorkbook workbook = new XSSFWorkbook(stream); - return null; + + String refersTo = getRefersTo(workbook, nameOrAddress); + if (refersTo == null) { + refersTo = nameOrAddress; + } + Range range = new Range(refersTo); + + int sheetIndex = getSheetIndex(workbook, range.getSheetName()); + if (sheetIndex == -1) { + throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'."); + } + + Sheet sheet = workbook.getSheetAt(sheetIndex); + return readSheetToTable(sheet, range, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); } } From beb0f42a131db34c999cee5d5cdaa5d0f3c93532 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 29 Apr 2022 13:32:06 +0100 Subject: [PATCH 06/28] Restructure and add Range type --- .../Table/0.0.0-dev/src/Io/Excel.enso | 94 +++++++++++++++++++ .../Table/0.0.0-dev/src/Io/Excel_Section.enso | 13 --- .../Table/0.0.0-dev/src/Io/File_Format.enso | 3 +- .../Standard/Table/0.0.0-dev/src/Main.enso | 4 + .../org/enso/table/format/xlsx/Range.java | 48 +++++++++- 5 files changed, 145 insertions(+), 17 deletions(-) create mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso delete mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso new file mode 100644 index 000000000000..5c27b1954aad --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -0,0 +1,94 @@ +from Standard.Base import all + +polyglot java import org.enso.table.format.xlsx.Range as Java_Range +polyglot java import java.lang.IllegalArgumentException + +## Specified the part of an Excel Workbook to Read +type Excel_Section + ## Gets a list of sheets within a workbook + type Sheet_Names + + ## Gets a list of named ranges within a workbook + type Range_Names + + ## Gets the data from a specific sheet. Column names are the Excel column names. + type Sheet (sheet:(Integer|Text)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) + + ## Gets a specific range (taking either a defined name or external style address) from the workbook + type Range (address:(Text|Range)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) + +## Specifies a range within Excel and provides various ways of +type Range + type Range java_range:Java_Range + + ## Gets the address to this in A1 format. + address : Text + address = this.java_range.getAddress + + ## Validates if a column index (1-based) is within the valid range for Excel. + is_valid_column : Integer -> Integer -> Boolean + is_valid_column column (limit=16384) = (column > 0) && (column <= limit) + + ## Validates if a row index (1-based) is within the valid range for Excel. + is_valid_row : Integer -> Integer -> Boolean + is_valid_row row (limit=1048576) = (row > 0) && (row <= limit) + + ## Given a column name parse to the index (1-based) or return index unchanged. + column_index : (Text|Integer) -> Integer + column_index column : + if column.is_an Integer then column else Java_Range.parseA1Column column + + ## Create a Range from an address + from_address : Text -> Range + from_address address : + Panic.catch IllegalArgumentException (Range (Java_Range.new address)) caught_panic-> + Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage) + + ## Create a Range for a single cell. + for_cell : Text -> (Text|Integer) -> Integer -> Range + for_cell sheet column row = + col_index = Range.column_index column + + col_valid = here.validate (Range.is_valid_column col_index) ("Invalid column for Excel: " + column + ".") _ + all_valid = here.validate (Range.is_valid_row bottom) ("Invalid row for Excel: " + row + ".") (col_valid _) + + all_valid <| Range (Java_Range.new sheet col_index row col_index_row) + + ## Create a Range for a range of cells. + for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Range + for_range sheet left top right bottom = + left_index = column_index left + right_index = column_index right + + left_valid = here.validate (Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ + right_valid = here.validate (Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) + top_valid = here.validate (Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") (right_valid _) + all_valid = here.validate (Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) + + all_valid <| Range (Java_Range.new sheet left_index top right_index bottom) + + ## Create a Range for a set of columns. + for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Range + for_columns sheet left (right=left) = + left_index = column_index left + right_index = column_index right + + left_valid = here.validate (Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ + all_valid = here.validate (Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) + + all_valid <| Range (Java_Range.new sheet left_index 0 right_index 0) + + ## Create a Range for a set of rows. + for_rows : Text -> Integer -> Integer -> Range + for_rows sheet top (bottom=top) = + top_valid = here.validate (Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") _ + all_valid = here.validate (Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) + + all_valid <| Range (Java_Range.new sheet 0 top 0 bottom) + + +## PRIVATE + Wrapper for validation +validate : Boolean -> Text -> Any +validate ~validation ~error_message ~wrapped = + if validation then wrapper else Error.throw (Illegal_Argument_Error error_message) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso deleted file mode 100644 index 681b599ddda3..000000000000 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel_Section.enso +++ /dev/null @@ -1,13 +0,0 @@ -from Standard.Base import all - -## Gets a list of sheets within a workbook -type Sheet_Names - -## Gets a list of named ranges within a workbook -type Range_Names - -## Gets the data from a specific sheet. Column names are the Excel column names. -type Sheet (sheet:Integer|Text) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) - -## Gets a specific range (taking either a defined name or external style address) from the workbook -type Range (address:Text) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) \ No newline at end of file diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index a963ba5bb62c..33d5e4ac43f0 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -9,10 +9,9 @@ import Standard.Table.Internal.Delimited_Reader from Standard.Table.Error import Invalid_Location import Standard.Table.Data.Table -import Standard.Table.Io.Excel_Section +from Standard.Table.Io.Excel import Excel_Section polyglot java import org.enso.table.format.xlsx.Reader -polyglot java import org.enso.table.format.xlsx.Range polyglot java import java.lang.IllegalArgumentException polyglot java import java.io.IOException diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso index dda566a09c91..29de6f6a3b51 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso @@ -3,6 +3,8 @@ from Standard.Base import all import Standard.Geo.Geo_Json import Standard.Table.Io.Csv import Standard.Table.Io.Format +import Standard.Table.Io.File_Read +import Standard.Table.Io.Excel import Standard.Table.Io.Spreadsheet import Standard.Table.Io.Spreadsheet_Write_Mode import Standard.Table.Data.Table @@ -11,12 +13,14 @@ import Standard.Table.Data.Order_Rule import Standard.Table.Model from Standard.Table.Io.Csv export all hiding Parser +from Standard.Table.Io.Excel export all from Standard.Table.Io.Spreadsheet export all hiding Reader export Standard.Table.Io.Format export Standard.Table.Io.Spreadsheet_Write_Mode export Standard.Table.Data.Column export Standard.Table.Model +export Standard.Table.Io.File_Read from Standard.Table.Data.Table export new, from_rows, join, concat, No_Such_Column_Error, Table from Standard.Table.Data.Order_Rule export Order_Rule diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index 731399ffa89f..e6ef065a5107 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -1,12 +1,14 @@ package org.enso.table.format.xlsx; +import org.apache.poi.ss.util.CellReference; + import java.util.Optional; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Range { - private static final Pattern FULL_ADDRESS = Pattern.compile("^('[^']+'|[^'!]+)!(.+)$"); + private static final Pattern FULL_ADDRESS = Pattern.compile("^('[.*]+'|[^'!]+)!(.+)$"); private static String[] parseFullAddress(String fullAddress) { if (fullAddress == null) { @@ -127,6 +129,22 @@ private static int[] parseRC(CharSequence address) { return new int[] {row, col}; } + public static int parseA1Column(CharSequence column) { + int col = 0; + + int index = 0; + while (index < column.length() && isLetter(column.charAt(index))) { + col = 26 * col + (column.charAt(index) - 'A' + 1); + index++; + } + + if (index != column.length()) { + return -1; + } + + return col; + } + private final String sheetName; private final int leftColumn; private final int rightColumn; @@ -135,7 +153,7 @@ private static int[] parseRC(CharSequence address) { public Range(String fullAddress) { String[] sheetAndRange = parseFullAddress(fullAddress); - this.sheetName = sheetAndRange[0]; + this.sheetName = sheetAndRange[0].replaceAll("^'(.*)'$", "$1").replaceAll("''", "'"); int[] range = parseRange(sheetAndRange[1]); this.leftColumn = range[1]; @@ -144,6 +162,14 @@ public Range(String fullAddress) { this.bottomRow = range[2]; } + public Range(String sheetName, int leftColumn, int topRow, int rightColumn, int bottomRow) { + this.sheetName = sheetName; + this.leftColumn = leftColumn; + this.topRow = topRow; + this.rightColumn = rightColumn; + this.bottomRow = bottomRow; + } + public String getSheetName() { return sheetName; } @@ -171,4 +197,22 @@ public int getTopRow() { public int getBottomRow() { return bottomRow; } + + public String getAddress() { + String sheetNameEscaped = getSheetName(); + if (sheetNameEscaped.contains(" ") || sheetNameEscaped.contains("'")) { + sheetNameEscaped = "'" + sheetNameEscaped.replace("'", "''") + "'"; + } + + String range = + (isWholeColumn() ? "" : CellReference.convertNumToColString(getLeftColumn())) + + (isWholeRow() ? "" : Integer.toString(getTopRow())); + if (getLeftColumn() != getRightColumn() || getTopRow() != getBottomRow()) { + range += (isWholeColumn() ? "" : CellReference.convertNumToColString(getRightColumn())) + + (isWholeRow() ? "" : Integer.toString(getBottomRow())); + + } + + return sheetNameEscaped + "!" + range; + } } From 70b43cbd39d6211d70f77063d38ef96a6dfd3fa2 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 29 Apr 2022 16:57:27 +0100 Subject: [PATCH 07/28] Fix issues with the Excel_Range type --- .../Table/0.0.0-dev/src/Io/Excel.enso | 93 ++++++++++++------- .../Table/0.0.0-dev/src/Io/File_Format.enso | 31 +------ .../Standard/Table/0.0.0-dev/src/Main.enso | 2 +- .../org/enso/table/format/xlsx/Range.java | 19 ++-- 4 files changed, 73 insertions(+), 72 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 5c27b1954aad..122ec023e356 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -1,8 +1,13 @@ -from Standard.Base import all +from Standard.Base import Integer, Text, Nothing, Boolean, Illegal_Argument_Error, Any, Error, Panic, File, Vector +from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior + +import Standard.Table.Data.Table polyglot java import org.enso.table.format.xlsx.Range as Java_Range +polyglot java import org.enso.table.format.xlsx.Reader polyglot java import java.lang.IllegalArgumentException + ## Specified the part of an Excel Workbook to Read type Excel_Section ## Gets a list of sheets within a workbook @@ -15,16 +20,21 @@ type Excel_Section type Sheet (sheet:(Integer|Text)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) ## Gets a specific range (taking either a defined name or external style address) from the workbook - type Range (address:(Text|Range)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) + type Range (address:(Text|Excel_Range)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) + ## Specifies a range within Excel and provides various ways of -type Range - type Range java_range:Java_Range +type Excel_Range + type Excel_Range java_range:Java_Range ## Gets the address to this in A1 format. address : Text address = this.java_range.getAddress + ## Display the Excel_Range + to_text : Text + to_text = "Excel_Range " + this.address + ## Validates if a column index (1-based) is within the valid range for Excel. is_valid_column : Integer -> Integer -> Boolean is_valid_column column (limit=16384) = (column > 0) && (column <= limit) @@ -35,60 +45,75 @@ type Range ## Given a column name parse to the index (1-based) or return index unchanged. column_index : (Text|Integer) -> Integer - column_index column : + column_index column = if column.is_an Integer then column else Java_Range.parseA1Column column ## Create a Range from an address - from_address : Text -> Range - from_address address : - Panic.catch IllegalArgumentException (Range (Java_Range.new address)) caught_panic-> + from_address : Text -> Excel_Range + from_address address = + Panic.catch IllegalArgumentException (Excel_Range (Java_Range.new address)) caught_panic-> Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage) ## Create a Range for a single cell. - for_cell : Text -> (Text|Integer) -> Integer -> Range + for_cell : Text -> (Text|Integer) -> Integer -> Excel_Range for_cell sheet column row = - col_index = Range.column_index column + col_index = Excel_Range.column_index column - col_valid = here.validate (Range.is_valid_column col_index) ("Invalid column for Excel: " + column + ".") _ - all_valid = here.validate (Range.is_valid_row bottom) ("Invalid row for Excel: " + row + ".") (col_valid _) + col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column + ".") _ + all_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row + ".") (col_valid _) - all_valid <| Range (Java_Range.new sheet col_index row col_index_row) + all_valid <| Excel_Range (Java_Range.new sheet col_index row col_index row) ## Create a Range for a range of cells. - for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Range + for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Excel_Range for_range sheet left top right bottom = - left_index = column_index left - right_index = column_index right + left_index = Excel_Range.column_index left + right_index = Excel_Range.column_index right - left_valid = here.validate (Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ - right_valid = here.validate (Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) - top_valid = here.validate (Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") (right_valid _) - all_valid = here.validate (Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ + right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") (right_valid _) + all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) - all_valid <| Range (Java_Range.new sheet left_index top right_index bottom) + all_valid <| Excel_Range (Java_Range.new sheet left_index top right_index bottom) - ## Create a Range for a set of columns. - for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Range + ## Create an Excel_Range for a set of columns. + for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Excel_Range for_columns sheet left (right=left) = - left_index = column_index left - right_index = column_index right + left_index = Excel_Range.column_index left + right_index = Excel_Range.column_index right - left_valid = here.validate (Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ - all_valid = here.validate (Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ + all_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) - all_valid <| Range (Java_Range.new sheet left_index 0 right_index 0) + all_valid <| Excel_Range (Java_Range.new sheet left_index 0 right_index 0) - ## Create a Range for a set of rows. - for_rows : Text -> Integer -> Integer -> Range + ## Create an Excel_Range for a set of rows. + for_rows : Text -> Integer -> Integer -> Excel_Range for_rows sheet top (bottom=top) = - top_valid = here.validate (Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") _ - all_valid = here.validate (Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") _ + all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) - all_valid <| Range (Java_Range.new sheet 0 top 0 bottom) + all_valid <| Excel_Range (Java_Range.new sheet 0 top 0 bottom) ## PRIVATE Wrapper for validation validate : Boolean -> Text -> Any validate ~validation ~error_message ~wrapped = - if validation then wrapper else Error.throw (Illegal_Argument_Error error_message) + if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) + + +read_excel : File -> Excel_Section -> Problem_Behavior -> (Table | Vector) +read_excel file section on_problems = + reader stream = case section of + Sheet_Names -> Vector.Vector (Reader.readSheetNames stream) + Range_Names -> Vector.Vector (Reader.readRangeNames stream) + Sheet sheet skip_rows row_limit -> + Table.Table <| + if sheet.is_an Integer then (Reader.readSheetByIndex stream sheet skip_rows row_limit) else + Reader.readSheetByName stream sheet skip_rows row_limit + Range address skip_rows row_limit -> Table.Table (Reader.readRange stream address skip_rows row_limit) + + file.with_input_stream [File.Option.Read] stream-> + stream.with_java_stream reader diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index 33d5e4ac43f0..80d9738f849c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -6,15 +6,7 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding import Standard.Table.Internal.Delimited_Reader -from Standard.Table.Error import Invalid_Location - -import Standard.Table.Data.Table -from Standard.Table.Io.Excel import Excel_Section - -polyglot java import org.enso.table.format.xlsx.Reader -polyglot java import java.lang.IllegalArgumentException - -polyglot java import java.io.IOException +import Standard.Table.Io.Excel as Excel_Module ## This type needs to be here to allow for the usage of Standard.Table functions. Ideally, it would be an interface within Standard.Base and @@ -125,26 +117,9 @@ type Infer ## Read the file to a `Table` from an Excel file type Excel - type Excel (section:Excel_Section=Excel_Section.Sheet_Names) - - ## Given a string parse into an Excel Range - parse_address : String -> Pair String Vector - parse_address address = - parsed = Panic.catch IllegalArgumentException (Range.new address) _-> - Error.throw (Invalid_Location address) - Pair parsed.getSheetName [parsed.getTopRow, parsed.getLeftColumn, parsed.getBottomRow, parsed.getRightColumn] + type Excel (section:Excel_Section=Excel_Module.Sheet_Names) ## Implements the `File.read` for this `File_Format` read : File -> Problem_Behavior -> Any read file on_problems = - reader stream = case this.section of - Excel_Section.Sheet_Names -> Vector.Vector (Reader.readSheetNames stream) - Excel_Section.Range_Names -> Vector.Vector (Reader.readRangeNames stream) - Excel_Section.Sheet sheet skip_rows row_limit -> - Table.Table <| - if sheet.is_an Integer then (Reader.readSheetByIndex stream sheet skip_rows row_limit) else - Reader.readSheetByName stream sheet skip_rows row_limit - Excel_Section.Range address skip_rows row_limit -> Table.Table (Reader.readRange stream address skip_rows row_limit) - - file.with_input_stream [File.Option.Read] stream-> - stream.with_java_stream reader + Excel_Module.read_excel file this.section on_problems diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso index 29de6f6a3b51..b0eb47479581 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso @@ -13,7 +13,7 @@ import Standard.Table.Data.Order_Rule import Standard.Table.Model from Standard.Table.Io.Csv export all hiding Parser -from Standard.Table.Io.Excel export all +from Standard.Table.Io.Excel export Excel_Section, Excel_Range, read_excel from Standard.Table.Io.Spreadsheet export all hiding Reader export Standard.Table.Io.Format diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index e6ef065a5107..51d90a751f1c 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -8,7 +8,7 @@ import java.util.regex.Pattern; public class Range { - private static final Pattern FULL_ADDRESS = Pattern.compile("^('[.*]+'|[^'!]+)!(.+)$"); + private static final Pattern FULL_ADDRESS = Pattern.compile("^('.+'|[^'!]+)!(.+)$"); private static String[] parseFullAddress(String fullAddress) { if (fullAddress == null) { @@ -29,13 +29,13 @@ private static String[] parseFullAddress(String fullAddress) { private static final String ADDRESS_RC = "R(?:\\[\\d+]|\\d+)C(?:\\[\\d+]|\\d+)"; private static final Pattern RANGE_A1 = - Pattern.compile("(" + ADDRESS_A1 + ")(?::(" + ADDRESS_A1 + "))?"); + Pattern.compile("^(" + ADDRESS_A1 + ")(?::(" + ADDRESS_A1 + "))?$"); private static final Pattern RANGE_COL = - Pattern.compile("(" + ADDRESS_COL + ")(?::(" + ADDRESS_COL + "))?"); + Pattern.compile("^(" + ADDRESS_COL + ")(?::(" + ADDRESS_COL + "))?$"); private static final Pattern RANGE_ROW = - Pattern.compile("(" + ADDRESS_ROW + ")(?::(" + ADDRESS_ROW + "))?"); + Pattern.compile("^(" + ADDRESS_ROW + ")(?::(" + ADDRESS_ROW + "))?$"); private static final Pattern RANGE_RC = - Pattern.compile("(" + ADDRESS_RC + ")(?::(" + ADDRESS_RC + "))?"); + Pattern.compile("^(" + ADDRESS_RC + ")(?::(" + ADDRESS_RC + "))?$"); private static int[] parseRange(String range) { for (Pattern pattern : new Pattern[] {RANGE_A1, RANGE_COL, RANGE_ROW, RANGE_RC}) { @@ -109,6 +109,7 @@ private static int[] parseRC(CharSequence address) { } row = Integer.parseInt(address, index + 1, endIndex, 10); + index = endIndex; } int col = 0; @@ -205,11 +206,11 @@ public String getAddress() { } String range = - (isWholeColumn() ? "" : CellReference.convertNumToColString(getLeftColumn())) - + (isWholeRow() ? "" : Integer.toString(getTopRow())); + (isWholeRow() ? "" : CellReference.convertNumToColString(getLeftColumn() - 1)) + + (isWholeColumn() ? "" : Integer.toString(getTopRow())); if (getLeftColumn() != getRightColumn() || getTopRow() != getBottomRow()) { - range += (isWholeColumn() ? "" : CellReference.convertNumToColString(getRightColumn())) - + (isWholeRow() ? "" : Integer.toString(getBottomRow())); + range += ":" + (isWholeRow() ? "" : CellReference.convertNumToColString(getRightColumn() - 1)) + + (isWholeColumn() ? "" : Integer.toString(getBottomRow())); } From 3c1140beaf6b9386fce337d052e44457059a6643 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 29 Apr 2022 18:23:14 +0100 Subject: [PATCH 08/28] Working on Excel still --- .../Table/0.0.0-dev/src/Io/Excel.enso | 33 ++++++++++++++- test/Table_Tests/src/Excel_Spec.enso | 42 +++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 test/Table_Tests/src/Excel_Spec.enso diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 122ec023e356..57b3a0f592f2 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -27,6 +27,34 @@ type Excel_Section type Excel_Range type Excel_Range java_range:Java_Range + ## Gets the name of the sheet + sheet_name : Text + sheet_name = this.java_range.getSheetName + + ## Gets the index (1-based) of the top row of the range + Return Nothing if referring to a complete column + top_row : Integer | Nothing + top_row = if this.java_range.isWholeColumn then Nothing else + this.java_range.getTopRow + + ## Gets the index (1-based) of the bottom row of the range + Return Nothing if referring to a complete column + bottom_row : Integer | Nothing + bottom_row = if this.java_range.isWholeColumn then Nothing else + this.java_range.getBottomRow + + ## Gets the index (1-based) of the left column of the range + Return Nothing if referring to a complete row + left_column : Integer | Nothing + left_column = if this.java_range.isWholeRow then Nothing else + this.java_range.getLeftColumn + + ## Gets the index (1-based) of the right column of the range + Return Nothing if referring to a complete column + right_column : Integer | Nothing + right_column = if this.java_range.isWholeRow then Nothing else + this.java_range.getRightColumn + ## Gets the address to this in A1 format. address : Text address = this.java_range.getAddress @@ -113,7 +141,10 @@ read_excel file section on_problems = Table.Table <| if sheet.is_an Integer then (Reader.readSheetByIndex stream sheet skip_rows row_limit) else Reader.readSheetByName stream sheet skip_rows row_limit - Range address skip_rows row_limit -> Table.Table (Reader.readRange stream address skip_rows row_limit) + Range address skip_rows row_limit -> + Table.Table <| + range = (if address.is_an Excel_Range then address else Excel_Range.from_address address) + Reader.readRange stream range.java_range skip_rows row_limit file.with_input_stream [File.Option.Read] stream-> stream.with_java_stream reader diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso new file mode 100644 index 000000000000..86bd07a7807d --- /dev/null +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -0,0 +1,42 @@ +from Standard.Base import all + +from Standard.Table.Io.Excel import Excel_Range +import Standard.Test + +spec = Test.group 'Excel Range' <| + Test.specify 'should be able to parse A1 format' <| + Excel_Range.from_address "Test!A1:D5" . shee + IO.println <| Excel_Range.from_address "Test!1:4" + IO.println <| Excel_Range.from_address "Test!DD:XHZ" + + Test.specify 'should change Nothing to "Column"' <| + strategy = Unique_Name_Strategy.new + strategy.make_valid_name Nothing . should_equal "Column" + strategy.invalid_names.length . should_equal 1 + + Test.specify 'should not rename unique names' <| + strategy = Unique_Name_Strategy.new + strategy.make_unique "A" . should_equal "A" + strategy.make_unique "B" . should_equal "B" + strategy.make_unique "C" . should_equal "C" + strategy.renames.length . should_equal 0 + strategy.invalid_names.length . should_equal 0 + + Test.specify 'should rename duplicates names' <| + strategy = Unique_Name_Strategy.new + strategy.make_unique "A" . should_equal "A" + strategy.make_unique "A" . should_equal "A_1" + strategy.make_unique "A" . should_equal "A_2" + strategy.renames.length . should_equal 2 + strategy.invalid_names.length . should_equal 0 + + Test.specify 'should preserve existing suffix' <| + strategy = Unique_Name_Strategy.new + strategy.make_unique "A" . should_equal "A" + strategy.make_unique "A_1" . should_equal "A_1" + strategy.make_unique "A" . should_equal "A_2" + strategy.make_unique "A_1" . should_equal "A_1_1" + strategy.renames.length . should_equal 2 + strategy.invalid_names.length . should_equal 0 + +main = Test.Suite.run_main here.spec From 4f7d8a09aa236025faba392cf6c671eb86670d19 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 3 May 2022 14:24:05 +0100 Subject: [PATCH 09/28] Range tests --- .../Table/0.0.0-dev/src/Io/Excel.enso | 21 +++-- .../org/enso/table/format/xlsx/Range.java | 23 +++-- test/Table_Tests/src/Excel_Spec.enso | 89 +++++++++++-------- 3 files changed, 78 insertions(+), 55 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 57b3a0f592f2..4e298e484d1d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -87,8 +87,8 @@ type Excel_Range for_cell sheet column row = col_index = Excel_Range.column_index column - col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column + ".") _ - all_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row + ".") (col_valid _) + col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".") _ + all_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".") (col_valid _) all_valid <| Excel_Range (Java_Range.new sheet col_index row col_index row) @@ -98,10 +98,10 @@ type Excel_Range left_index = Excel_Range.column_index left right_index = Excel_Range.column_index right - left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ - right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) - top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") (right_valid _) - all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _ + right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _) + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") (right_valid _) + all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _) all_valid <| Excel_Range (Java_Range.new sheet left_index top right_index bottom) @@ -111,16 +111,16 @@ type Excel_Range left_index = Excel_Range.column_index left right_index = Excel_Range.column_index right - left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left + ".") _ - all_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right + ".") (left_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _ + all_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _) all_valid <| Excel_Range (Java_Range.new sheet left_index 0 right_index 0) ## Create an Excel_Range for a set of rows. for_rows : Text -> Integer -> Integer -> Excel_Range for_rows sheet top (bottom=top) = - top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top + ".") _ - all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom + ".") (top_valid _) + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") _ + all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _) all_valid <| Excel_Range (Java_Range.new sheet 0 top 0 bottom) @@ -131,7 +131,6 @@ validate : Boolean -> Text -> Any validate ~validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) - read_excel : File -> Excel_Section -> Problem_Behavior -> (Table | Vector) read_excel file section on_problems = reader stream = case section of diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index 51d90a751f1c..28be7d9f869b 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -72,23 +72,29 @@ private static Optional parseRange( }); } - private static boolean isLetter(char c) { - return c >= 'A' && c <= 'Z'; - } + private static boolean isLetter(char c) { return c >= 'A' && c <= 'Z'; } private static boolean isDigit(char c) { return c >= '0' && c <= '9'; } + private static int skipDollar(CharSequence address, int index) { + if (index < address.length() - 1 && address.charAt(index) == '$') { + index++; + } + return index; + } + private static int[] parseA1(CharSequence address) { int col = 0; - int index = 0; + int index = skipDollar(address, 0); while (index < address.length() && isLetter(address.charAt(index))) { col = 26 * col + (address.charAt(index) - 'A' + 1); index++; } + index = skipDollar(address, index); int row = index < address.length() ? Integer.parseInt(address, index, address.length(), 10) : 0; return new int[] {row, col}; } @@ -165,10 +171,10 @@ public Range(String fullAddress) { public Range(String sheetName, int leftColumn, int topRow, int rightColumn, int bottomRow) { this.sheetName = sheetName; - this.leftColumn = leftColumn; - this.topRow = topRow; - this.rightColumn = rightColumn; - this.bottomRow = bottomRow; + this.leftColumn = Math.min(leftColumn, rightColumn); + this.topRow = Math.min(bottomRow, topRow); + this.rightColumn = Math.max(leftColumn, rightColumn); + this.bottomRow = Math.max(bottomRow, topRow); } public String getSheetName() { @@ -211,7 +217,6 @@ public String getAddress() { if (getLeftColumn() != getRightColumn() || getTopRow() != getBottomRow()) { range += ":" + (isWholeRow() ? "" : CellReference.convertNumToColString(getRightColumn() - 1)) + (isWholeColumn() ? "" : Integer.toString(getBottomRow())); - } return sheetNameEscaped + "!" + range; diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index 86bd07a7807d..dd62832d44c4 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -3,40 +3,59 @@ from Standard.Base import all from Standard.Table.Io.Excel import Excel_Range import Standard.Test -spec = Test.group 'Excel Range' <| - Test.specify 'should be able to parse A1 format' <| - Excel_Range.from_address "Test!A1:D5" . shee - IO.println <| Excel_Range.from_address "Test!1:4" - IO.println <| Excel_Range.from_address "Test!DD:XHZ" - - Test.specify 'should change Nothing to "Column"' <| - strategy = Unique_Name_Strategy.new - strategy.make_valid_name Nothing . should_equal "Column" - strategy.invalid_names.length . should_equal 1 - - Test.specify 'should not rename unique names' <| - strategy = Unique_Name_Strategy.new - strategy.make_unique "A" . should_equal "A" - strategy.make_unique "B" . should_equal "B" - strategy.make_unique "C" . should_equal "C" - strategy.renames.length . should_equal 0 - strategy.invalid_names.length . should_equal 0 - - Test.specify 'should rename duplicates names' <| - strategy = Unique_Name_Strategy.new - strategy.make_unique "A" . should_equal "A" - strategy.make_unique "A" . should_equal "A_1" - strategy.make_unique "A" . should_equal "A_2" - strategy.renames.length . should_equal 2 - strategy.invalid_names.length . should_equal 0 - - Test.specify 'should preserve existing suffix' <| - strategy = Unique_Name_Strategy.new - strategy.make_unique "A" . should_equal "A" - strategy.make_unique "A_1" . should_equal "A_1" - strategy.make_unique "A" . should_equal "A_2" - strategy.make_unique "A_1" . should_equal "A_1_1" - strategy.renames.length . should_equal 2 - strategy.invalid_names.length . should_equal 0 +spec = + Test.group 'Excel Range' <| + check_range range sheet_name tlbr_vector = + range.sheet_name . should_equal sheet_name + range.top_row . should_equal (tlbr_vector.at 0) + range.left_column . should_equal (tlbr_vector.at 1) + range.bottom_row . should_equal (tlbr_vector.at 2) + range.right_column . should_equal (tlbr_vector.at 3) + + Test.specify 'should be able to parse A1 format' <| + check_range (Excel_Range.from_address "Test!EE4") 'Test' [4, 135, 4, 135] + check_range (Excel_Range.from_address "Test!A1:D5") 'Test' [1, 1, 5, 4] + check_range (Excel_Range.from_address "Test!1234") 'Test' [1234, Nothing, 1234, Nothing] + check_range (Excel_Range.from_address "Test!1:4") 'Test' [1, Nothing, 4, Nothing] + check_range (Excel_Range.from_address "Test!CB") 'Test' [Nothing, 80, Nothing, 80] + check_range (Excel_Range.from_address "Test!DD:XAZ") 'Test' [Nothing, 108, Nothing, 16276] + check_range (Excel_Range.from_address "'Hello World'!$EE4") 'Hello World' [4, 135, 4, 135] + check_range (Excel_Range.from_address "Test!A1:$D$5") 'Test' [1, 1, 5, 4] + check_range (Excel_Range.from_address "Test!1234") 'Test' [1234, Nothing, 1234, Nothing] + check_range (Excel_Range.from_address "Test!$1:$4") 'Test' [1, Nothing, 4, Nothing] + check_range (Excel_Range.from_address "Test!$CB") 'Test' [Nothing, 80, Nothing, 80] + check_range (Excel_Range.from_address "Test!$DD:$XAZ") 'Test' [Nothing, 108, Nothing, 16276] + + Test.specify 'should be able to parse RC format' <| + check_range (Excel_Range.from_address "Test!R1C1") 'Test' [1, 1, 1, 1] + check_range (Excel_Range.from_address "Test!R1C1:R5C3") 'Test' [1, 1, 5, 3] + + Test.specify 'should fail gracefully for invalid patterns' <| + Excel_Range.from_address "Test!$$QA1" . should_fail_with Illegal_Argument_Error + Excel_Range.from_address "Test!BADADDRESS" . should_fail_with Illegal_Argument_Error + + Test.specify 'should allow Range creation for a cell' <| + check_range (Excel_Range.for_cell "Hello World" 123 14) 'Hello World' [14, 123, 14, 123] + check_range (Excel_Range.for_cell "Hello World" "DS" 14) 'Hello World' [14, 123, 14, 123] + Excel_Range.for_cell "Test" 123 14 . address . should_equal "Test!DS14" + Excel_Range.for_cell "Hello World" 123 14 . address . should_equal "'Hello World'!DS14" + Excel_Range.for_cell "Test" 20000 1 . should_fail_with Illegal_Argument_Error + Excel_Range.for_cell "Test" 0 1 . should_fail_with Illegal_Argument_Error + Excel_Range.for_cell "Test" 1 10000000 . should_fail_with Illegal_Argument_Error + Excel_Range.for_cell "Test" 1 0 . should_fail_with Illegal_Argument_Error + + Test.specify 'should allow Range creation for a range' <| + check_range (Excel_Range.for_range "Hello World" 55 120 123 14) 'Hello World' [14, 55, 120, 123] + check_range (Excel_Range.for_range "Hello World" "BC" 120 "DS" 14) 'Hello World' [14, 55, 120, 123] + Excel_Range.for_range "Test" 55 120 123 14 . address . should_equal "Test!BC14:DS120" + Excel_Range.for_range "Hello World" 55 120 123 14 . address . should_equal "'Hello World'!BC14:DS120" + Excel_Range.for_range "Test" 20000 1 123 14 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" 0 1 123 14 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" 5 1 20000 14 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" 5 1 0 14 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" 5 0 123 14 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" 5 10000000 123 14 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" 5 1 123 0 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" 5 1 123 10000000 . should_fail_with Illegal_Argument_Error main = Test.Suite.run_main here.spec From bbeb097d6fcbb8a53396868e40d12eece553917e Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 3 May 2022 14:38:33 +0100 Subject: [PATCH 10/28] Rest of tests for Excel_Range --- test/Table_Tests/src/Excel_Spec.enso | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index dd62832d44c4..275841896889 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -40,6 +40,7 @@ spec = Excel_Range.for_cell "Test" 123 14 . address . should_equal "Test!DS14" Excel_Range.for_cell "Hello World" 123 14 . address . should_equal "'Hello World'!DS14" Excel_Range.for_cell "Test" 20000 1 . should_fail_with Illegal_Argument_Error + Excel_Range.for_cell "Test" "ZZZ" 1 . should_fail_with Illegal_Argument_Error Excel_Range.for_cell "Test" 0 1 . should_fail_with Illegal_Argument_Error Excel_Range.for_cell "Test" 1 10000000 . should_fail_with Illegal_Argument_Error Excel_Range.for_cell "Test" 1 0 . should_fail_with Illegal_Argument_Error @@ -50,6 +51,7 @@ spec = Excel_Range.for_range "Test" 55 120 123 14 . address . should_equal "Test!BC14:DS120" Excel_Range.for_range "Hello World" 55 120 123 14 . address . should_equal "'Hello World'!BC14:DS120" Excel_Range.for_range "Test" 20000 1 123 14 . should_fail_with Illegal_Argument_Error + Excel_Range.for_range "Test" "ZZZ" 1 123 14 . should_fail_with Illegal_Argument_Error Excel_Range.for_range "Test" 0 1 123 14 . should_fail_with Illegal_Argument_Error Excel_Range.for_range "Test" 5 1 20000 14 . should_fail_with Illegal_Argument_Error Excel_Range.for_range "Test" 5 1 0 14 . should_fail_with Illegal_Argument_Error @@ -58,4 +60,36 @@ spec = Excel_Range.for_range "Test" 5 1 123 0 . should_fail_with Illegal_Argument_Error Excel_Range.for_range "Test" 5 1 123 10000000 . should_fail_with Illegal_Argument_Error + Test.specify 'should allow Range creation for a column' <| + check_range (Excel_Range.for_columns "Hello World" 123) 'Hello World' [Nothing, 123, Nothing, 123] + check_range (Excel_Range.for_columns "Hello World" "DS") 'Hello World' [Nothing, 123, Nothing, 123] + Excel_Range.for_columns "Test" 123 . address . should_equal "Test!DS" + Excel_Range.for_columns "Hello World" 123 . address . should_equal "'Hello World'!DS" + Excel_Range.for_columns "Test" 20000 . should_fail_with Illegal_Argument_Error + Excel_Range.for_columns "Test" "ZZZ" . should_fail_with Illegal_Argument_Error + Excel_Range.for_columns "Test" 0 . should_fail_with Illegal_Argument_Error + + Test.specify 'should allow Range creation for columns' <| + check_range (Excel_Range.for_columns "Hello World" "BC" 123) 'Hello World' [Nothing, 55, Nothing, 123] + check_range (Excel_Range.for_columns "Hello World" 55 "DS") 'Hello World' [Nothing, 55, Nothing, 123] + Excel_Range.for_columns "Test" 55 123 . address . should_equal "Test!BC:DS" + Excel_Range.for_columns "Hello World" "BC" "DS" . address . should_equal "'Hello World'!BC:DS" + Excel_Range.for_columns "Test" 55 20000 . should_fail_with Illegal_Argument_Error + Excel_Range.for_columns "Test" 55 "ZZZ" . should_fail_with Illegal_Argument_Error + Excel_Range.for_columns "Test" 55 0 . should_fail_with Illegal_Argument_Error + + Test.specify 'should allow Range creation for a row' <| + check_range (Excel_Range.for_rows "Hello World" 123) 'Hello World' [123, Nothing, 123, Nothing] + Excel_Range.for_rows "Test" 123 . address . should_equal "Test!123" + Excel_Range.for_rows "Hello World" 123 . address . should_equal "'Hello World'!123" + Excel_Range.for_rows "Test" 20000000 . should_fail_with Illegal_Argument_Error + Excel_Range.for_rows "Test" 0 . should_fail_with Illegal_Argument_Error + + Test.specify 'should allow Range creation for rows' <| + check_range (Excel_Range.for_rows "Hello World" 55 123) 'Hello World' [55, Nothing, 123, Nothing] + Excel_Range.for_rows "Test" 55 123 . address . should_equal "Test!55:123" + Excel_Range.for_rows "Hello World" 55 123 . address . should_equal "'Hello World'!55:123" + Excel_Range.for_rows "Test" 55 20000000 . should_fail_with Illegal_Argument_Error + Excel_Range.for_rows "Test" 55 0 . should_fail_with Illegal_Argument_Error + main = Test.Suite.run_main here.spec From 2cb0ee1d089a64ac8b050c5a2e5f6fb1058908fe Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 3 May 2022 15:58:46 +0100 Subject: [PATCH 11/28] File.Read tests for Excel --- .../Table/0.0.0-dev/src/Io/Excel.enso | 6 ++++-- .../Table/0.0.0-dev/src/Io/File_Format.enso | 1 + test/Table_Tests/data/TestSheet.xlsx | Bin 0 -> 9862 bytes test/Table_Tests/data/TestSheetOld.xls | Bin 0 -> 26624 bytes test/Table_Tests/src/Excel_Spec.enso | 20 ++++++++++++++++++ 5 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 test/Table_Tests/data/TestSheet.xlsx create mode 100644 test/Table_Tests/data/TestSheetOld.xls diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 4e298e484d1d..161d392fe72c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -145,5 +145,7 @@ read_excel file section on_problems = range = (if address.is_an Excel_Range then address else Excel_Range.from_address address) Reader.readRange stream range.java_range skip_rows row_limit - file.with_input_stream [File.Option.Read] stream-> - stream.with_java_stream reader + file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause + Panic.catch IOException file_failure <| + file.with_input_stream [File.Option.Read] stream-> + stream.with_java_stream reader diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index 80d9738f849c..67f5a2126ca1 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -27,6 +27,7 @@ type Auto if ".log".equals_ignore_case extension then Ref.put output File_Format.Text if ".csv".equals_ignore_case extension then Ref.put output (File_Format.Delimited ',') if ".tsv".equals_ignore_case extension then Ref.put output (File_Format.Delimited '\t') + if ".xlsx".equals_ignore_case extension then Ref.put output File_Format.Excel Ref.get output diff --git a/test/Table_Tests/data/TestSheet.xlsx b/test/Table_Tests/data/TestSheet.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d0608e7b8a38427c554178badf056f31d5d43430 GIT binary patch literal 9862 zcmeHNRa9Ktvc)>MySrOs3GNascyOn&#w9>-cXuZQNpJ|R!9BP;1PPGf9^iFy&wV*3 z=e*zdbMN|CdvuRoHTGJ2&Z@bqma;r791heICXecNG02CB96atK{ zguR^$$j-%3-NOOotk33dYeSI_2g8sD1p~SNzvDl62YQmm?7Dy`eMgCxXc`r2#Opvs z?BMPPl4k|g@B>5=)RQz3sDTZ+#6}rKXhEWjg(}2%cRr4$==}$M=FDN1t;|v#d*LX0 z+jWj-s;x@7X9Dq*jK#?pXmZuiI9n4JGY-v&euNeREK-U%-wJz5LZpp1V0l;5Z{`^` z=Xg`aG?01V`SvdCK-;ALZ?kkzIf#V26j)h7O5lf9+HfNkGxVj8v!yspa9}1W1t!fG zk4}SbXaI{Mf-=RA#C;ewQ*TyomW)!bh6=-_C7!Cp8>n{^IN5F&$3~(b;rANHqsfV6 zrX$r>rl4x(eZ&y6DuaGkEu|AFP1vw2yq+7NQdH<`9Czs@do3B$DdLE+CJ^_==b;t3#I%Qa@J`8sm~#ZQ-E*}8A47&Cy zzoh&RX5ycgUY4M!)D1)lIePv*bl_@cB@RPe-c9Uz3#Gb`pX@SvUG!TT;*}115)5^s zKv)^yHlOc9ODlrW`va7ho7`oQSU3XI^={=Mske@<@Jw`0DbkK*8$Fn>W-ewf(`Dp5 z7+$r-GL<&Idm}%%MlC&cB3X?w#->Sxh+Rk$f+v*juhXZfy=rn@0X-wG_O(2us^MAQ ze&SfB?_5gJ7gUijK83@{bld@F6N`lkuK^p%iyI;hHA_CLDw7;1eku<`Gn{X^=YyXxPOt9BSe8?=x2%k-YQa)` zF{OTQk#5~yQVTQ3$EK}Vw9jlc!~Q|!G0ld+*T?Ko8``fqyCfJx@!f&#%Q&Ql-7-p^ z!!p9kDnCZw$)ynNtOfvOK9lPdP_M14_9c8!Ty8+;EI1=R4Wut+&Ee;uY65+}6!7$G z*h^6HM~yTQx9F7W66i1)9o3V?z!JISV0t?O#IhK2!^dXjdw?{Uz9d^vEsGBO(6 zTV)`yBii__=$HaEU%t6$$;ecM@>BsTH#$i?d}W|l z*2-f)#+A+rzOHWT%U9+%crhc@*i4BE)|6eT!5tlx^UEwPR6p#-GFZ}x0T_@X;F4?h z`o=MRGAlS28r-<=H-ge6i*ahj)APTIyv9;>iZrAUqX_iU?7)wMHWQjAM4#x^O&X!* zlC@!du9CNqvuowQO=-K)lCxD9{^ji+F>m(@(6kY|m+cDcb;)Af-rI8KR#LY(%vapL z#V6*6!Wmu@d7)yfg z#kC+|+>Zq5^~fu@76I)T14L!d@f8N0OWJAML41xlmSmhJqxDfucv7-uy~3Pe={ z$edgs(W;{g(OO?7S3}H(nk^WYj@eK>4R?mIb$q@AGBTYpFdb4C`iTGa>hd8jFb5`C z9Go!R7088d-jY~4M*#Qk(~~E@*`s3=$QF@YW{T&q;PDBW0+dujoP4iyFIclQf5CK{ zUJ&yw{WMfC1Labhr?e4R+JrCTD}2$;Qp$VQ!+8Y~_VC1k`26@(^=seS{dS!0lgmUF zUa#VK2F+~aw_2xxzK zxJ`_noV)YIX5qJ_s_{KPQ;zv!=i1RohSyg>x~jjS$ge?rdp+N;rKh<>_*C@ca9B>qSb&XypMi!=Mr3&)QlzwEVY=181=P*fE(3 zL}Mqj|(8wmz&_^Ifvx^&(b0{n!q1A180u~K1kP~i{I}Rj|Dl#@> z9U>N1+G874yeXcFBl(i4MB%U-)Gun)$GM zYnff0cqH2f4#S~ySL%#PkT8y~%ZtRB_6k7?6$_~o?1B33LE^N^nS+8u$cbPBE#?fv zh=tqwK?@NJhI7F95jn{AkN>LNzZAhCLK_lWoi$xZSH(P*{_EZ#B>Z%Ck^>TD1KS|- zNNY%&%Jh6!~L*ksOlj4K3|W z6VO1~+C5N2y{X<;vQ7axRgQBagJ|0BfxT@Eph7Lrdu@NVCf?I z`$sM-qi~qRqT~AZ5+No3hn?rhq-&+eD`Xu3%a_og$VSn(rm7{E%YOI-DoHWUJr6%K z;2)qDLkpz9kg7D{6~!#nK~1Q2!jw~Nu8AgCT5X5r_Un;afqqol86{6^NAG6&%)*yR zR%x@3A(rklq|*Jqkiiq1NyGClOsv#6OCX6{t|y&ymdkxZs@r=bX(O1c%x95yHU?ZC zTW4J!u0`F1&#~^&X97>{E@YYr#7oJdajDpBE<~JKFf*es;z3L-hZ-jJTfgvgg|W;x zaF2&-MuI26s$s?~a)v6)a1s!kKU~v&QVB0e>o*M(LWh^{hP&ifoxfQ zN`p>(jP@6hjqIcNQP=tz_FXJNwjlPO^UsKXs684<{0yf9|5gOW+3h>eRxIu6`j}1P z3XNHI5=mYCp~_1x&V*KCY#3U;3sr`K!c=j4p`=MsSk$)jSVWrIFXR*Sqx00|+ccRr zayqY2BV``iii@vud|scqoJ_W*y_QKs?n+28Y*#)N%8Hs#qnS$a1@kg+dnr9nioygT z+T9?te3xI!^>MRQ!nHwwiL$3RCnfbEYOEN^JVX2|Ki|Yr=xzd7}}CMAi1c1p8p`(F3uQL`F7?_i{_EWFy898q5g*^(vsNV zxjc2>st|IN=5;CLN-;)Zz3$L#^J0sGxjY<0-R?R0uLjBu=U`{XONv9<)pF_k#f(nD1>1ud#)+d5I-IEA77U{q;jHwSxBl*SQi8Y3C_)_ay{ zrx390L$K(K8r2z58#p2zfpn ze>n^75b{0WyXiKpX{uwAw}YouQ~Nd(YxsD7kH2cz@o>C<^6bkr{Yh6($M>7z)Q-m! z{)?*3WnA`l@6*#SiTG>Y7rRQaq>D==q-n>L;nl|#8>UBek;eXLcPG#y**!qnkB=R4 z6jlEzofLs!90Bl$Xl@$A-=B}jgUKlFk~tOE&U3+;|fzvpR!ywl!f zQzQXbywI!pXk|g?MTLZc9v?b1N~w6aKv{gR*aZPeAqf)Y8jj@##QLs#bE-zbXX}h4 z`mkHjC(`QHsPr~+e3yW#NYJ8S{e0bMIF|tb+56A`8az##`cRNe{ zb#@Gz;yMq^Hu>ksDaJX#9x$P$L-rsVLE9qrHQqNbeFtVA`w#s_hO0tf+u;bktO+SD z8!ksNRKNj(#vuquDEKP@VYm-^lG&v?Hs6vm=QE$8!{SK4Y{O3c9Njw>UPJ%gZHUYb z+W83}R`=vCi9eG8v@7ZV0r85Q1^n7FXZfQ;LYecdhlaDk%=P14iTRX)a3$yyIQ>kg z@8a@8%sRc*%YTUO=yWefskM5Wi!$lkFrc?Z+|3)Vbs`EL_-;w1nZC&y`V-Z422UEx zauPM`mLr`AJ?CQVIEs!0Xys#8gu;coI~QG^X+fwfGj8s9`Q3{%_t@yE7Zy!r%^B~J z0SPzmjC#=$sd7boIzmUQ<1 zMG@To)sWRU8I^m2B;S@<(+@*fXDdULnLN`G!$n}gowJkLDrAK~CFcdX z|AtzH!$V2$8fh8L0!ddJd30p zxz>nDzN8px{z&Cl`Dtc3T|D6xBNIx@8IDWuF||hCl&M}1KA@wuA(dla@p6nL(8o*=J0O*g0CHXd5cx|l*R2+ zsTpo3i3gIN8bvNAYDXiiwer!kKKd})z^};7vnCN4cVMUos=mRjFAaW0@N)RRN^jql z34PrtUPTXW>fjby&v;zwl~rZ`9NV|8ZPw9X%*aRu!x8+tjoZz3y#+7FBTS0yySkw~ zZQEDY!RB>B3~+5P-i{y6@&_A~>mH%LUa6WO5j<|AMnzkku~R;HhE(U&po?Hl3-WoZ z{SH1^PT*Yw@qOMs1+%HqHlw#$JJgkROa0?N7Uei!h!%E2RA@0og`)l|{W`mN*npgW zNY4Tdefvxxj(2YDqtQ(^k>x-F!n>5B=gH01ssdJ(XlXqqB5@p~3$>5!V2Yr3Q?st@ z)!t3Q!dF>f`gZF=(Xo+fy;3-Md|_#Y?dEms)OGgQ^oEb`>eX@8Eqk%NjCk7R);c1= zo*0M5V`)-QZ}#Di%FAB{P2c3m>KN`tFkY)#Q@;-`T!?SxjKZs1q5&X!`@W#rM!G^^ zPVaaksd_f8ZcqV|VqJ6U{Q9Q5>-kRUQubts9YEPOkZ@FmctkNw$62r-vYl7vba_uF z^%BM*oJ~IGL#j)vnXILcILzk;N&x0b_lHH@GwA|Cj>B|;I|rgvodu%pW)-{N~4fMebwU4@Wk9Ox^k?$8?W;=s7MwZrTPMF`HAdqK1r zcFMg}5jxv@a*IIDv{MdQ5_RqCilr-{L)VtIF}Dv;EKvT=q41ny3vOsI9Rt1Mt|P@_ z-ZosFn#;;Q60TdwiHf9Hc>wtJ{MlR;Q3JJ(#-_vC z04tZaTXg8`-gDT|a6AjSC?3i=NyK)yOhE0{M`T-KHWtke6=-1OWkKnYP2D$Pw}{wc zzg7UV74_lPHAiRh4gJew=@8Rdi-$5>`1tjy@1GXX^FI=n)w%JUvqZh)5bPAOnn=du85jdHE*ycRYljG0 z_nN&gZqewq#mlK1%Z~(r_Eh~_|7V#~!3}AK(gt{d4Rc}LHA!vm4YIbAuP3q?=ap&H z3J$L4`+MWuNOYa2=u&ZSu|J=qCq6(B{r4{}Wr8-%9tg(45GLUKfptq`Cy<$% zi<6a|#ZSnajdxl@%JfgxJ&liyTnw`>fzpn8i$OIjOYjcZbsRlT){G#}I}enX4zZ8) z;pArZhQ{h{3WwA)RXQ*8;+p3@n`a&P$qd%Ss(JTU2oS;7(jn#E+63k*Hz|!FVrGk$ zqbq)$C!8;ydpLzN@(yz7^C%(ekHO{#sZ^HkE5h$0s^eQEEVsb$BzBC>e-Gq2EXVYe z#CzLIbf5Wn2#cL^IapZxLeHv2pYhI{QBiHEDX1c-IKSSzZ0|eE9i-;=_f;pJvb3;1Ocpz#tB3lgMy9obUrD zANao3J7ZN=xQ}URo<*w6 zer-8{B{F)39OEgjYwS9y7-VCncX8fPkgO(a*vK5phLn=;0#`K6I;3}ON>$PP$r6BL z$lhVKt?$4C%DqC0UUycOoY7TBH<*g!-FZLH1Z<3f1)^y$E>Q}6(z?Yfn6!%nzvt*eLxi38y zPr+PITL$&ENeFuuZBF#KtErna|D!KWx&D$rDHTx4u&xcgLH`RCx-jdlhl&mHu~zR= z8k#XvSZ1TCJ=S2eES{#45WBxiirZFh(r~1dPfCAe<_w(3gYXUhzXL8b3@aq1{PS6z zf4|*-U;p9cj(mno%?!eD#>F-{CFE9P_GLQAs z%kRae-yQrekbgPQB>L&#mt6kc^mo1S%T$8&U#7pSkKaA~HFy8Ahl1iGgM#{7CjZ_1 juW|R!=B*TeGXEnAE6W2Q82NF41sSRjA{I)hfBgC%X5IK( literal 0 HcmV?d00001 diff --git a/test/Table_Tests/data/TestSheetOld.xls b/test/Table_Tests/data/TestSheetOld.xls new file mode 100644 index 0000000000000000000000000000000000000000..ff8fac5a59eb81a8d0ca2d98b3dacd8a4bc5a2ad GIT binary patch literal 26624 zcmeHQ3tUav_uuz+s~f!~5_Nl^^wiTUy^u$Vyq`&K52d(@{Ag;(kZ1B%#%m(-%scX) zOoXHu3;EL~#7yDba;QfsP*an(DAL6_7yCHUC0NJ)=fT^lG$3??y^+h_}+(VqM%CDM6rwa!=rZ zqbai~+M`7y7+lh(krEv6$i}2-Da0`2wI%AL6(N>8EUoOVteks}caWT#@j5_^Z_Bth zE<`wG0<(Y_(S#0H&+O>T> zgC*Pxb?^ywFyq-1ThbXB8;Xr*oFiSTyce-2-J}J90xVfHr9qsxcY{DTVn>`90wvsa z@+GX4*ap5zVg#*C$Oo~2`lS!&1S&dO;{V?c93r4ij7KhwuMVe_{g2e$;%%YqFH1k5 zfS#^^zDWT+O96e40{UJBbVd5CQ&2un0bLQkqVk6n;QY7sIZ4TFEz@3S6wr?09txroCefCi}F#e(aYK?)x`j+ck5+2pY}sd3B8;Qfm3rye}v&4jYHFoC3LL@ zx`2>WctFtl*V9>YbtL8M?L+$yj4mbkli;+S1V>v!uh*01pV-!+Q?15L9Su*Wou0lQ?g7_g5uivfdTvlx0e zm%*;N4ED`saA+=rV{;jLG?$^L6a#ixX4K#$^L<%i1{Yq`>cb41fy=LpKiHFTKrMhI z3xX>~1m4mo81;oWw8iXA+-2kM)tz@brt2#_7>Qu*Y2!UP9G#Hl$^~W1Uzh4(C zuD^{GNH)=s4-8&hjyuH0;lw#5(N+GRICzvzS z$}?%QO4&*&^;fb2bIVhTX_f!XQtaO70ZP#w39T=qlth>~l~Zpi*_w5!uNnG3Cb=%1 zlao^qBJlyHX(hs#w5FSz-7&pD4Z34u&7iHMeL%JrFbS`78mxuP_(#$)#o`i~TIA>F z%Qy{_CaDF|0Gmn@r(x1$$5=42XVe1yK+3eq{1L4F*b5kV_2E^f7HD^@9y6$+-6b@- zH_8jSmcZ2za^XU@ktHCC;}(vv8$e{VLmR6RVFXh<*$4{b{>DK{0+oZbZwd%I__KSD zl@thE$hQd&G8NkdXJt*WiE@xOO#vwhR1VUiDIhMc%0aB9K;S~YO>i8o*d{o`X@X6Z zgTOqnagdTg&XAj66XhT<&u<*0Bv3hspeZ0OuF63=OM$?Je4F5ePq9sK zwWtX;Q4V6-6p)fYdu?L_i!M`;u_@CnRyy3nVlEhiDT`o-pEW)jP2|p5xO7MgbjhF$2XIu*JB@ zim_B8hMt2^V$_8ZpP$~AFUF89##L5KCnaL&SqddaLm2Vu)hqd8TCv5r$%^TsL=4Vj z>Wro-j1a#okuS!GEyi6|jH41UI9;d{!xu(8sHv4NrZroPhpZSEC1U7dEoC${Cs`c=M|Hf%9+Jk3gp7^bI{1paw-pL{WG*<$2)T4yC< zn4VS=c>n%=`C?4iV&r(5of0ujPb&!&Kieo@Ogpw1IiBXEL=4l@N&*Y-yp}J*Z3T8S8@r@6SkKbR)J zHfC%way-pOi5RA*xwsY=7t0r8&K4ub)9jUqVS1X2tN3n&d@%yH7&)HitV9gc(_CD$ zE*uxRyXY)!CYMYybssVT0PT zK$#u_RX#`LgLY~F1>azUI

Q{y@mMtP1&{of|;G6WE|#S)k0;CuI7*0C}K%VgpZC z;ukh0liyvs&mJl5u zm&yND8jx!&Zb|6u4izd zF!PEpb)_YBwVJC3+cMzwg@2SVJVxLzjE&9Yr?B*DBW?kDMZsV&oW#J;&>zlX;U7~g zx$Yy;3fG@B>b0V;BwEo|60PW~)v#!5m#QYt(Y*<`aLcC~>V7M?TA(I~<4aIdA>yy4D3HIAz+s9Gu9}kMm0h7YWiT={-6;0el#c=2@2A0ArMXzI!vG8Q)xJKMNQC*4uq?@x-iLL zfJ5wKgB!>Jr{kXjaIyFH6ID zaEym^i%FSfHMa$EhlV5r;-)8pD>#+@WKkroc0iy2LmwHR0KYSfrXyYQ1KLAmPqQG4R2KH93EUMl|XlNmDZI*K;&TX zbP*LG5US4l95{vOT;E^1VSgDMpCHWSPnO2uwt)VE7zv;hf%-`zFD_I|LK-eIHw0Sx zN)poOD@jPBuU2zglI{c+wSp59s1~2i651M|gDD%xwmCE8V}z7yb0joqLSIQVp|2#G z&{r5Rd!(v^?X0C$P*;bB7y@+`)ldAC&Yrax=M4Y=|S@h2k%ILhC_SF zdOHUpdQlMZ3FrZ=wFfm53IYvIw?2<@Dyj7aw7VpZ(6yCZp^F(1F9f9wuDGM3M7cYktI<}URcG4IA!4#{Ly3uW052SYG(ldh6BiCsp|S%uaPMH;L&jy`*&NsdxgR!7!(Lxr8`z&I;b{w<7m^0Xw(1gHop2LSsi2HB4l=xGXgUq*@C=ysCO3HiL?j&R`5H{KRzx27Pdl%&zUZqIl(b$TI}9cR|j8o*6&-} z|MP-shk@DoKKza^=3icMdHcL`KW(%&F8(=x*NDGs1Ivc{OC>d)0kbr_?X0y4Mni{?fK4eO0$7#r}7y)s7wPFxu;N^`{Nik28)G zczX=qJJ6rM;PAQ3@|U@v`V|}dIbJ#0`GeyRcIn%$jkz%VlgYv76NTQfW${SNJI%_ZbePQ8(o5@4VF1GDB%PA+r-)ToO zsj^9lvNXMKT5~`6vPO$j{Inl_uifqSb^p>&Rcn3~7;oC6aq+Rg_tmJXSzc#T<`*yg zVOK}~xTNXd?U;N2Xz1x7H+S^9*3~qp|1syahI)(d2A>Yu?>FUUv3}XWzW3cGTc_+= zH6!$n`G)UJDm?;keJ~EW<2h;n%G_e{;fu~C-v^&hIbdJimlx-`CNTQx(K?j;9bYdKXUGRFO4EXN-Nerp^-KHOIxr z-^{b$Wtw}hfBPTpW2-K=e)b}%ZSj%itsV{dY39&mtBQ<;`;I;k+&*eoVez~DgNY}t zr+O_781?*Ct?lE!HFlj^aJ$J@esI~en>XMYanPI)>4^62tFj=Km)4KDaSgVkbr~L<%J371;`G(9&u*)3gq~79(zE0}@s@Z1KZY!tV z`!e%zfp6B=#kwcU9BqfbRqMCP_!#jYl*|Wklv~Kq1N_9^N%KnZuS1AQ}Rkni>gHhp`|Hb zi>`0hIpWo3yJ-1di#s<{W}ZBGE-HU_?bGvu`$vX%S>NT<#&2}Cidx^lK3GR`i0_SN6QtK4lWLdTw;-6QDW_Jw!0 z8ih_gzuxp{zpJ+o_}ZSypMSu3*0`KKIiU;u20kusQ7~kGtb3bX*977HYO~*rJz2gx z?)utaZ5DrLUr=rwZhSTIkny(gqMl@k&l2l&ukM51wAHa#lC?OK)A?a=|cm$m8i^}&tMGkT+ z@qAWp+|^R9S&4pPRlA=(s#@ueDN;Y}>2>VHN59@{FCF^hqH})ggrcYgGj?&z_7rNj ztlc%E@?myx&@WbLgU8SH92(pGgjbND&~W{gh{X<@0*rn%RPR<)Htg&nt)dmTygFXW zk6EwXy;ZDz;jT+Dg2!$HwrKD5xw7zMf8IA6Px9yIIowa!lBCmb!mhZgR;F6V)wk=u zOAd5!eds?d-E#ev-b-?$-y~c6w<&(OOfP8sxc+YvQf>dZXJwx@=vl_aIXb(%)3l$= zn!EnGrD<`;jVBUbTNMRPSsJv-Vu@j!V*!s#FFkD2ia+#ehpsQ?&)%54C^zPf+LAj3 ze`}pzcj`tkZ>7b_O>5qfp=Se6UR+=6eE9Kk&mG}u6UIcW4;VS?LG>k*4$Gq_E&SNK z$Y}F+gQAB|{?^sdSvT6_#sF`=Mc&J>QIouUX6@Zm)3#;Q>fxb>H(A!EAM`u6NbT5n z!LyI8Ns6#pp&z>I6FD7s@3hW{>l+8^y_i0ktH(c*qVF_#NBb6~VZ7S0`vNTTr>-wB zex0;=#?_QICofl?>2)k>eB~LQ$-HQj{x5fyXimEoP<~IKx1slZjf-pJd@qSo9Jl|r z$2hA;b=cvc_F-=*xs{nnVwfskdy0{z2k$ORv&jN zy_*{y(4+dM>z1fjKSc)Gyec;N@o&H2WL~eE%X%Elefs`Ht4OPJ)@cX!l$_TvcC74w zH7UFJM!NP~i-D8ehQD8%=~FU7^TMDFsp+dG&iZX+Qjg^Yv1bh4AB zpf(|B#)P@O5)0hQ9V?@krWwR}XiYPDP?@f8miFt--6Ng+p5&j|U+sAGx$Us$Uj#=) z?JDk@Po6O%bips8xUGja+D5(HeEZ9lktTi-i!%$nzf12rZ@J;(4%@yj(ATLvdUeRV z_uriz7%|{PhJ*Em^2#1Bez+^JG@Q9IGxv4glSlx#Zn0_Hj8=vh0tW5Zh^`K|(j0tz z^Rmm~z1Hl!zr3~Z#d|}Sd7JFA&rW@tw_vSx*8Ed*tDcvemfEJI2`*i!*zQufn4dIb zNZ`KimTx-Q51KOPvqS5hbMCo^_I=TI;2nb?Keq!m5iY@1J=R7hydM#v_GIce5ze_` znR6`K^Y)zE)bHq?=#lOrx-ogLKlxn|^(!lh*|~Pa@QQ@Ut}%=HnvBk`u2^#V{M4|? zKSf6@3?5c1y0Xtf^dMwQ#t1FvqzLbTib5Y>=iKV7G37>2D%|#pKF+Jy-(jiR_x`Ue z`(F6zueA&Bbvz|>zi_gH{lwkxz+mMU%93u5nmVXv)U#*frg?Px?&;0>jljd~{~;kbGe^gv;1AqBD*OD_xtX!$7~C<1r#rs9Wj+GwK?_A;JjLqv(uu4}JMcXTLq)aZ%WrjnSvpZ!T^X?((o})Shc^{4I;*TC%z_GAqD z*o^F9Qv;(3j3=3mz&1Y{i9LpCK)Njti5k(cG1yMffN>Yrj+ojgafu;=)Nv09OeNLH z1gL#EZqg=MFk#AKOfBFBSq8La3>uch!t~d%`chxVw&WV&ZCnY$w-3Y(wujlnd`$g{SDF2OR0uLpPNGA-)L5TD-U+%riTl!vWCmuXp7 zh7V;@f02|y`Fd1MP?mOG8GR@tpm6R0trE)gBG%Nu-mv2VBNZV&)DgZXf#0RzOG~Bf z*n@OHTy?549LgQFv?BR!NF<{eI1j!aQtQ*W=b| zB~hjx)zsg>q8Ii!+GEvWA3lD;j-E>2Q3HROR8j4NT&R^EE|OCBFp^MS?}J=iic4|_ zxwe+kT&Oqnu{thXGDR*v#ihpJQUkB3#Tm1vY66df*# zpBW#KAn2PEE{vsgAe&E*t{M9?#|Q9I`o#XI<7h(%BXlz%uL*EHIo8BCGZw1HyHlPf)8p5 zeDF%&{{m&OoGLZn0wM5?8h&;sfbWj+5B=b`#?48_M4j5VhLX{b1bjRaa*jHZe)Ny@ zQS$%AAaxW-7r|GhlPNu7qzg9T3jPm}dikhB6IiGs<9+Co>SP%8#U6f4j&;XRZX+n2 zkkat=y&_y2kdJ)``%hElTOb&A9!Tn>4@kqW{inhAgV+Mi)2G|NQGZ($Tqpnk03(PX Ag#Z8m literal 0 HcmV?d00001 diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index 275841896889..bc6dcab88655 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -1,6 +1,9 @@ from Standard.Base import all +import Standard.Table.Io.File_Read +import Standard.Table.Io.File_Format from Standard.Table.Io.Excel import Excel_Range + import Standard.Test spec = @@ -92,4 +95,21 @@ spec = Excel_Range.for_rows "Test" 55 20000000 . should_fail_with Illegal_Argument_Error Excel_Range.for_rows "Test" 55 0 . should_fail_with Illegal_Argument_Error + test_sheet = Enso_Project.data / "TestSheet.xlsx" + test_path = test_sheet.path + + Test.group "Read XLSX Files" <| + Test.specify "should let you read the sheet names" <| + sheet_names = ["Sheet1", "Another"] + test_sheet.read . should_equal sheet_names + File.read test_sheet . should_equal sheet_names + File.read test_path . should_equal sheet_names + + Test.specify "should let you read the sheet names" <| + sheet_names = ["Sheet1", "Another"] + test_sheet.read . should_equal sheet_names + File.read test_sheet . should_equal sheet_names + File.read test_path . should_equal sheet_names + + main = Test.Suite.run_main here.spec From 48bb969b51c96df61992b954c3985af8cfc17613 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 3 May 2022 17:12:18 +0100 Subject: [PATCH 12/28] XLS support Sheet and Range names tests --- .../Table/0.0.0-dev/src/Io/Excel.enso | 19 +++---- .../Table/0.0.0-dev/src/Io/File_Format.enso | 12 +++-- .../org/enso/table/format/xlsx/Reader.java | 53 +++++++++++-------- test/Table_Tests/src/Excel_Spec.enso | 38 ++++++++----- 4 files changed, 76 insertions(+), 46 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 161d392fe72c..906f1f988642 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -1,4 +1,4 @@ -from Standard.Base import Integer, Text, Nothing, Boolean, Illegal_Argument_Error, Any, Error, Panic, File, Vector +from Standard.Base import Integer, Text, Nothing, Boolean, Illegal_Argument_Error, Any, Error, Panic, File, Vector, False from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior import Standard.Table.Data.Table @@ -6,6 +6,7 @@ import Standard.Table.Data.Table polyglot java import org.enso.table.format.xlsx.Range as Java_Range polyglot java import org.enso.table.format.xlsx.Reader polyglot java import java.lang.IllegalArgumentException +polyglot java import java.io.IOException ## Specified the part of an Excel Workbook to Read @@ -131,21 +132,21 @@ validate : Boolean -> Text -> Any validate ~validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) -read_excel : File -> Excel_Section -> Problem_Behavior -> (Table | Vector) -read_excel file section on_problems = +read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector) +read_excel file section on_problems xls_format=False = reader stream = case section of - Sheet_Names -> Vector.Vector (Reader.readSheetNames stream) - Range_Names -> Vector.Vector (Reader.readRangeNames stream) + Sheet_Names -> Vector.Vector (Reader.readSheetNames stream xls_format) + Range_Names -> Vector.Vector (Reader.readRangeNames stream xls_format) Sheet sheet skip_rows row_limit -> Table.Table <| - if sheet.is_an Integer then (Reader.readSheetByIndex stream sheet skip_rows row_limit) else - Reader.readSheetByName stream sheet skip_rows row_limit + if sheet.is_an Integer then (Reader.readSheetByIndex stream sheet skip_rows row_limit xls_format) else + Reader.readSheetByName stream sheet skip_rows row_limit xls_format Range address skip_rows row_limit -> Table.Table <| range = (if address.is_an Excel_Range then address else Excel_Range.from_address address) - Reader.readRange stream range.java_range skip_rows row_limit + Reader.readRange stream range.java_range skip_rows row_limit xls_format file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause - Panic.catch IOException file_failure <| + Panic.catch IOException handler=file_failure <| file.with_input_stream [File.Option.Read] stream-> stream.with_java_stream reader diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index 67f5a2126ca1..f68da2b16c6e 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -28,6 +28,9 @@ type Auto if ".csv".equals_ignore_case extension then Ref.put output (File_Format.Delimited ',') if ".tsv".equals_ignore_case extension then Ref.put output (File_Format.Delimited '\t') if ".xlsx".equals_ignore_case extension then Ref.put output File_Format.Excel + if ".xlsm".equals_ignore_case extension then Ref.put output File_Format.Excel + if ".xls".equals_ignore_case extension then Ref.put output File_Format.Excel + if ".xlt".equals_ignore_case extension then Ref.put output File_Format.Excel Ref.get output @@ -115,12 +118,15 @@ type Delimited ## A setting to infer the default behaviour of some option. type Infer - ## Read the file to a `Table` from an Excel file type Excel - type Excel (section:Excel_Section=Excel_Module.Sheet_Names) + type Excel (section:Excel_Section=Excel_Module.Sheet_Names) (xls_format:(True|False|Infer)=Infer) ## Implements the `File.read` for this `File_Format` read : File -> Problem_Behavior -> Any read file on_problems = - Excel_Module.read_excel file this.section on_problems + format = if this.xls_format != Infer then this.xls_format else + extension = file.extension + (extension.equals_ignore_case ".xls") || (extension.equals_ignore_case ".xlt") + + Excel_Module.read_excel file this.section on_problems format diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index 03beeb0d00da..596d8d756e76 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -101,8 +101,7 @@ private static Table read_table( String cellRange, boolean hasHeaders, String unnamedColumnPrefix, - Function mkDate) - throws IOException { + Function mkDate) { Sheet sheet = null; if (sheetIdx instanceof Long) { sheet = workbook.getSheetAt(((Long) sheetIdx).intValue()); @@ -189,7 +188,7 @@ private static Table read_table( if (value instanceof LocalDate) { value = mkDate.apply((LocalDate) value); } - builders.get(j - minCol).append(getCellValue(cell)); + builders.get(j - minCol).append(value); } } } @@ -301,8 +300,8 @@ private static Object getCellValue(Cell cell) { return null; } - public static String[] readSheetNames(InputStream stream) throws IOException { - XSSFWorkbook workbook = new XSSFWorkbook(stream); + public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException { + Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); int sheetCount = workbook.getNumberOfSheets(); var output = new String[sheetCount]; for (int i = 0; i < sheetCount; i++) { @@ -311,15 +310,19 @@ public static String[] readSheetNames(InputStream stream) throws IOException { return output; } - public static String[] readRangeNames(InputStream stream) throws IOException { - Workbook workbook = new XSSFWorkbook(stream); + public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException { + Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); return workbook.getAllNames().stream().map(Name::getNameName).toArray(String[]::new); } public static Table readSheetByName( - InputStream stream, String sheetName, Integer skip_rows, Integer row_limit) + InputStream stream, + String sheetName, + Integer skip_rows, + Integer row_limit, + boolean xls_format) throws IOException, IllegalArgumentException { - Workbook workbook = new XSSFWorkbook(stream); + Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); int sheetIndex = getSheetIndex(workbook, sheetName); if (sheetIndex == -1) { @@ -327,13 +330,17 @@ public static Table readSheetByName( } Sheet sheet = workbook.getSheetAt(sheetIndex); - return readSheetToTable(sheet, null, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); + return readSheetToTable( + sheet, + null, + skip_rows == null ? 0 : skip_rows, + row_limit == null ? Integer.MAX_VALUE : row_limit); } public static Table readSheetByIndex( - InputStream stream, int index, Integer skip_rows, Integer row_limit) + InputStream stream, int index, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException, IllegalArgumentException { - XSSFWorkbook workbook = new XSSFWorkbook(stream); + Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); int sheetCount = workbook.getNumberOfSheets(); if (index < 1 || index > sheetCount) { @@ -342,19 +349,17 @@ public static Table readSheetByIndex( } Sheet sheet = workbook.getSheetAt(index - 1); - return readSheetToTable(sheet, null, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); + return readSheetToTable( + sheet, + null, + skip_rows == null ? 0 : skip_rows, + row_limit == null ? Integer.MAX_VALUE : row_limit); } public static Table readRange( - InputStream stream, String nameOrAddress, Integer skip_rows, Integer row_limit) + InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException { - XSSFWorkbook workbook = new XSSFWorkbook(stream); - - String refersTo = getRefersTo(workbook, nameOrAddress); - if (refersTo == null) { - refersTo = nameOrAddress; - } - Range range = new Range(refersTo); + Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); int sheetIndex = getSheetIndex(workbook, range.getSheetName()); if (sheetIndex == -1) { @@ -362,6 +367,10 @@ public static Table readRange( } Sheet sheet = workbook.getSheetAt(sheetIndex); - return readSheetToTable(sheet, range, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); + return readSheetToTable( + sheet, + range, + skip_rows == null ? 0 : skip_rows, + row_limit == null ? Integer.MAX_VALUE : row_limit); } } diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index bc6dcab88655..41bc69cbc7b7 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -3,6 +3,7 @@ from Standard.Base import all import Standard.Table.Io.File_Read import Standard.Table.Io.File_Format from Standard.Table.Io.Excel import Excel_Range +from Standard.Table.Io.Excel import Sheet_Names, Range_Names, Sheet, Range import Standard.Test @@ -95,21 +96,34 @@ spec = Excel_Range.for_rows "Test" 55 20000000 . should_fail_with Illegal_Argument_Error Excel_Range.for_rows "Test" 55 0 . should_fail_with Illegal_Argument_Error - test_sheet = Enso_Project.data / "TestSheet.xlsx" - test_path = test_sheet.path + xlsx_sheet = Enso_Project.data / "TestSheet.xlsx" + xlsx_path = xlsx_sheet.path - Test.group "Read XLSX Files" <| - Test.specify "should let you read the sheet names" <| - sheet_names = ["Sheet1", "Another"] - test_sheet.read . should_equal sheet_names - File.read test_sheet . should_equal sheet_names - File.read test_path . should_equal sheet_names + xls_sheet = Enso_Project.data / "TestSheetOld.xls" + xls_path = xls_sheet.path + + col_a = ["Test", "Here", "Is", "Data"] + col_a = [1, 2, 3, 4] + col_a = [] - Test.specify "should let you read the sheet names" <| + Test.group "Read XLSX / XLS Files" <| + Test.specify "should let you read the sheet names with File_Format.Auto" <| + sheet_names = ["Sheet1", "Another"] + xlsx_sheet.read . should_equal sheet_names + File.read xlsx_sheet . should_equal sheet_names + File.read xlsx_path . should_equal sheet_names + xls_sheet.read . should_equal sheet_names + File.read xls_sheet . should_equal sheet_names + File.read xls_path . should_equal sheet_names + + Test.specify "should let you read the sheet names with File_Format.Excel" <| sheet_names = ["Sheet1", "Another"] - test_sheet.read . should_equal sheet_names - File.read test_sheet . should_equal sheet_names - File.read test_path . should_equal sheet_names + xlsx_sheet.read File_Format.Excel . should_equal sheet_names + xls_sheet.read File_Format.Excel . should_equal sheet_names + Test.specify "should let you read the range names" <| + range_names = ["myData"] + xlsx_sheet.read (File_Format.Excel Range_Names) . should_equal range_names + xls_sheet.read (File_Format.Excel Range_Names) . should_equal range_names main = Test.Suite.run_main here.spec From b636c2601fa460ceffb4a961ba1d70e119785285 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 3 May 2022 18:25:27 +0100 Subject: [PATCH 13/28] Sheet tests and row count fix --- .../org/enso/table/format/xlsx/Reader.java | 5 +- test/Table_Tests/data/TestSheet.xlsx | Bin 9862 -> 9830 bytes test/Table_Tests/src/Excel_Spec.enso | 51 +++++++++++++++++- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index 596d8d756e76..14f67194a669 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -26,7 +26,6 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; -import java.util.stream.Stream; /** A table reader for MS Excel files. */ public class Reader { @@ -207,7 +206,7 @@ private static Table readSheetToTable(Sheet sheet, Range range, int skipRows, in int endRow = Math.min( range == null || range.isWholeColumn() ? lastRow : range.getBottomRow(), - startRow + rowCount - 1); + rowCount == Integer.MAX_VALUE ? Integer.MAX_VALUE : startRow + rowCount - 1); // Columns int startCol = (range == null || range.isWholeRow() ? 1 : range.getLeftColumn()); @@ -249,7 +248,7 @@ private static Table readSheetToTable(Sheet sheet, Range range, int skipRows, in .mapToObj( idx -> new Column( - CellReference.convertNumToColString(startCol + idx), + CellReference.convertNumToColString(startCol + idx - 1), builders.get(idx).seal())) .toArray(Column[]::new); diff --git a/test/Table_Tests/data/TestSheet.xlsx b/test/Table_Tests/data/TestSheet.xlsx index d0608e7b8a38427c554178badf056f31d5d43430..8a80b2791617494d1a23eb6ea93428c8921562e8 100644 GIT binary patch delta 2339 zcmV+;3EcLEP3BCn&<2064IY2A0{{Rk2><{O0001ZY%h0ja%*C5Z)+}iZEUPnZExZ@ z5dMDB{fAuCuW91EL88Knyy+^ntE%PNFG!V}n8s=n=WM4?Rr}v}90<^5wc@&mz>e+V z@iQ~eOfJ5x>q>hiTu|0bbaZa%8fkJ?(B^rf|1-~wv91Y;n*x7VtRWNqjR^hA^v^#o zR*b(qG4`SXKqDr4DP*E-lhUA)@ zyi%3J9V_mCl9j*1^7mlK`fW4>((1JmLhwF`=IyxI`t)K+D{{Y)u7=z8E3Q?hDqX9v zkZD0BDJJ>|mTW~nZ1Ap)I$FW0?b)`co70_~Z@5;FCGLMJIR_yR@K71PA8af^7_&+e z-e5^$)=1Fd*00~y-1Gvl&DQ0)2t6FycJKPvQp4QtNm%(c^G&OIjut zX(QNz7Gxp8&Ba_P_Hho6Vo_j;7Y9Pc$5rtkq>6JDDRW11^Za8lfGZCNIo?RF!SiKu z1u411uaJKrkU_Dn-%H2Y^OQ?e3!m8hk* zexOakRukPoj^lw_Z~G13v0-D?8$8fLmLRpja(91we-rv#0&C=rlp$1vVmHy>(=lKr zQDBUal^CvL&5S5Y6C)WT%XcF?b*#8&WggJ#dn(ZDXRYb$;7;{f1kGE`eV}!Xhw2V~ zSs>-tq4lg`vLyV#9QxbN-0t%}+yeDM8&arpgF6pa+nejR1Z1aAHw^wdo1H=I&*C%W zT$q0c)}Wq`NI9-@C_gpzd;?_Jfx4Nj<&}{A49cevCFEMOkrlXxl{%i`jsx2myN+$d zZepk2C{5DHQ&Lf{rttr0(0u`h!#hv0EHRgJj`J6IQ{Iwggav53kE;oM56MQ}IIK0TohQ4Qr|VaBy+qGO5)VZ_GFd&%TH6b$FbS}n_$^N>rh>zUx#YUw!6E{m0wo zErT&e)-hGFRx8Ny5iEX~-QG<1+H7nC;E*RMo8!j0E+fS91|+kg?m+pQS{uon-3x~>I>QuD0@M zJYpqU;f@0oBgwO;wbF*IT3`2pCXAzl@gw$g^pj>dzsMv7H`?mjg`P}KnlH17 zVNM*l9Dl{vTq} zf;l+IaRc9|>|-URUv5_RhUI@B#AY{a{9LZu%eJO}^Cnwm^Q?)!W#u|5=6N2i)=3lP zSu$I#=c{sFmjCE-(MsImu`A-BtLfLYY6{Ahi)ns-OE~&>F&W2kW9@ESc)>IY3LX@Q zaWDwS%K_G)R81S~%>S883=w2E@qK*NpQi!-;YbS|->dM`9>0U!_5l_DmLe{&v2il0 z_S}FMIFgt;&XN5Ov!fBD3tgIBIs0D+T+3iijYsxDPFS@Eh+X9T|NXk%Nr)Y0&O))ln50aU{Ig}H(m6pu9 zf1}>vn(+hM0gecnV+ocCW*P2)emxS=87cUPtdF|v0fJ*FL4vXdhe3#cycV=He-8A@ z#3>8g)~?ez?vyR~Adc>nDMn+{G(i*hFKASx?OQVSt2;)8kntg_Qa%H z&+4cSpi5{IJkh#6mdx0VTluyC@z7q|Mz1Z7-j^Ak0O7fKbSQIC-s8trzD8L@qR1zA zezeF#LgSc{`Dd5l*L)XuxRB=@{}N|DiSuwy!+ScLO^6pjmf))|l6?b{p$ZhU${=b3 z39k(vf3yPt04tL{Bu)XNlZ+%S90Qk@1f~K20NDiq02lxO00000000000002p43qr~ z8k6rN5FCv=OomOc&<20%SYr~n0{{Ru2><{O0001ZY%h0ja%*C5Z)+}iZEUPnZEvGE z5dMDB{Ra;9D+TioO0o(8bd}mw)#lnSkt!FQVihoFOp~hG|Gr}gX}Vdha@~s(gYC)V zXJ(!mPrhucQh6b~rL3N+=-f~hQfDlu_0v@SXPIigsYKmV9) z7=M0b>{$VTx}B~__BaNoA=x@7 zFI47m&x*UBWaaL$+;=c!{5Bc_Y4utOA$XrebN1X!bv9YklH3i_RdCaM#g)udsVXII zMUqoN@~P^QALM@!f>9n*AFeYThLHCJ-7#$A6YmLTL29xB6igTWGnF)sz- zH5O#SY5_Xj`SrV+n@s?FQ7}-razp;@C?`-@pfB(hMx2HCvAxEkP&!^t)kV0vAq{I+ zN!_v)&B;oDo2#W%?86dX+f|MQUL6P(A6LbHkSfk(r1U+>;r+*809PK4a=aE?f%l8} z5>j%DUm$-$AcK5YzZZ}JWIfaw57EP0l*}VDNo*}hQJ~qL=V^iEA#LHip5vv7>7(cy zIK$mAW4IH=F8308s@jm~qpd5v9e0osb~Jyd@n-Bkntc8+jC zs=g(@WjS#aXdW_R&9;oW7DY*{#Xd4z8zN;Hi=Ll;K(OzwK(v1HhkYjndcqe-kV^Xd<$I^z%@T>27tu zxE>b9q#fQ};Y}>#zWevD3~#~vlALIq-koT;_(@T$=Fc1LXd!_yd z%Xcs)EKN|ziUdX7o~l*byJLo6#ShkYUaI=vYlxCvDVJy^5Caz9%-~`Ed{Pn}z&c@c ziD{f*1e%*h=xvFA|5`^=jGSk>VbW+Q@fjR`UVQyBKN`Dp9e_tc(5}Rt_kBT#;~gmG zV$*{TE3L7L`EYB0x5V`pSTnw)BuSGbCyEJ;FG67-F-+SE4z=NX1=?Q_3zGRzzw1Qr zuA1VHaEjU8zK^(3eIPbM3V$9^j1(_kw%S;>k)iJs%@{`~8;&%blWUss`CTR@xHZnS zJ`OUu)O_FmjLZn*H|f6rhZvd>3kNZ5;5(Imt|a<&v(k5eZ1PDgzhT4Yvc+CBCH|Y| z>vf&xX_S0h(kPp)rcsqGt7uu%DWzGRXI1qNFXoL1J3MrSEVL!AXi}``1e3*loS;7f zj_yN@hJoA|vl|v(LZT!X4kYklG!P4y1FS&G6934Y-^ug4MoX;A|$ z+e`myj^9BUY&#$Rmn_agYr||5&AA50!%&9Q@lfY~0JD}3F#!oF&7~jF0ssJPlh6+! zf0A2^+b|G?-wXW@LhrVGYluyprHQ+A-%4-qiad@*ba5r+M(Ka=*df`4yo3cCMq}wa zbF8y?I%!1?;Jmb^rfHl|0!G+Ynr%&g{d|t*l=#3+%at`y(=&MbwEXyCakOsddw@WY zV0=yc5Qd5|FM812$94dNUpnhF54dz&f93}Vyqykd#j+%sG0ml+7eeJ8Vc4#d0vap! z8ceuA94H=ee&5T%UujxAg45jX_CqAB9uTpSN`~_Ur9_MBWoxYC8->14X~6|KIeapF z^Q+9%zf;muIO}Z}VkEQ6%=h+7Rx&PbqTt~$`&!g2kX`(z!W~=plRnE)uA6+h5i8nmDA`ZHm zeom{VplrFA^7C85(Z8ul;F}+yKmocraROS0uzR>3D2Th=N^aYRr6ir1o4`y9P)*Wm&M)-femRV&C*3l=Sl z+nbDQ%4-A9rq*Cw2{!N~WhvJ=Iyk2(#+DyIG8=h^^5U*Gk~x2WG<&QWKe9dGgpfIw z;Fw^J;SLz;k%%rx!AGP%nW_f}j-dhx%2^yoG5+y7Fw*`x&?^(CEM2R8r%T)^Tkt^~ z-Dg{l#-?eaChcSR{4svtJiYW)!%}s-;(&@waLNrZSDWIINxz=eNgcsd&}Mj|b$hIs zbsN9(T?yj8y|!OPTRgolGduwz@bTzS=39A7A6DBnDiV?;A(@5AVjB~hrj*P-x&*)G zyST%JyyW0lbqvCL;!_BLDyZ08xxstpET3 diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index 41bc69cbc7b7..fb84c39d9837 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -1,5 +1,7 @@ from Standard.Base import all +import Standard.Base.Data.Time.Date + import Standard.Table.Io.File_Read import Standard.Table.Io.File_Format from Standard.Table.Io.Excel import Excel_Range @@ -103,8 +105,20 @@ spec = xls_path = xls_sheet.path col_a = ["Test", "Here", "Is", "Data"] - col_a = [1, 2, 3, 4] - col_a = [] + col_b = [1, 2, 3, 4] + # col_c = [Date.new 2022 06 12, Date.new 2022 10 20, Date.new 2022 07 30, Date.new 2022 10 15] + + check_column col expected = + start = col.length - expected.length + 0.up_to start . map i->(col.at i . should_equal Nothing) + start.up_to col.length . map i->(col.at i . should_equal (expected.at (i - start))) + + check_table table = + check_column (table.at "A") col_a + check_column (table.at "B") col_b + ## ToDo [JD]: Can't check Dates at present as not being handled correctly. Coming as as Polyglot array + https://www.pivotaltracker.com/story/show/181755990 + # check_column (table.at "C") col_c Test.group "Read XLSX / XLS Files" <| Test.specify "should let you read the sheet names with File_Format.Auto" <| @@ -126,4 +140,37 @@ spec = xlsx_sheet.read (File_Format.Excel Range_Names) . should_equal range_names xls_sheet.read (File_Format.Excel Range_Names) . should_equal range_names + Test.specify "should let you read by sheet index" <| + table = xlsx_sheet.read (File_Format.Excel (Sheet 1)) + check_table table + + table_2 = xlsx_sheet.read (File_Format.Excel (Sheet 1 (table.length - col_a.length))) + table_2.length . should_equal col_a.length + check_table <| table_2 + + Test.specify "should let you read by sheet name" <| + table = xlsx_sheet.read (File_Format.Excel (Sheet "Sheet1")) + check_table table + + table_2 = xlsx_sheet.read (File_Format.Excel (Sheet "Sheet1" (table.length - col_a.length))) + table_2.length . should_equal col_a.length + check_table <| table_2 + + Test.specify "should let you read XLS by sheet index" <| + table = xls_sheet.read (File_Format.Excel (Sheet 1)) + check_table table + + table_2 = xls_sheet.read (File_Format.Excel (Sheet 1 (table.length - col_a.length))) + table_2.length . should_equal col_a.length + check_table <| table_2 + + Test.specify "should let you read XLS by sheet name" <| + table = xls_sheet.read (File_Format.Excel (Sheet "Sheet1")) + check_table table + + table_2 = xls_sheet.read (File_Format.Excel (Sheet "Sheet1" (table.length - col_a.length))) + table_2.length . should_equal col_a.length + check_table <| table_2 + + main = Test.Suite.run_main here.spec From d184061ff4712071e56f1634d08666fc2ab49717 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 4 May 2022 10:43:05 +0100 Subject: [PATCH 14/28] Java tidy and restructure to allow range names and addresses. --- .../Table/0.0.0-dev/src/Io/Excel.enso | 15 ++- .../org/enso/table/format/xlsx/Range.java | 104 ++++++++++-------- .../org/enso/table/format/xlsx/Reader.java | 48 +++++--- 3 files changed, 99 insertions(+), 68 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 906f1f988642..dff366ae688a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -2,6 +2,7 @@ from Standard.Base import Integer, Text, Nothing, Boolean, Illegal_Argument_Erro from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior import Standard.Table.Data.Table +from Standard.Table.Error as Error_Module import Invalid_Location polyglot java import org.enso.table.format.xlsx.Range as Java_Range polyglot java import org.enso.table.format.xlsx.Reader @@ -133,7 +134,7 @@ validate ~validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector) -read_excel file section on_problems xls_format=False = +read_excel file section _ xls_format=False = reader stream = case section of Sheet_Names -> Vector.Vector (Reader.readSheetNames stream xls_format) Range_Names -> Vector.Vector (Reader.readRangeNames stream xls_format) @@ -143,10 +144,12 @@ read_excel file section on_problems xls_format=False = Reader.readSheetByName stream sheet skip_rows row_limit xls_format Range address skip_rows row_limit -> Table.Table <| - range = (if address.is_an Excel_Range then address else Excel_Range.from_address address) - Reader.readRange stream range.java_range skip_rows row_limit xls_format + if address.is_an Excel_Range then Reader.readRange stream range.java_range skip_rows row_limit xls_format else + Reader.readRangeByName stream range skip_rows row_limit xls_format file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause - Panic.catch IOException handler=file_failure <| - file.with_input_stream [File.Option.Read] stream-> - stream.with_java_stream reader + bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause) + + Panic.catch IllegalArgumentException handler= <| + Panic.catch IOException handler=file_failure <| + file.with_input_stream [File.Option.Read] stream->stream.with_java_stream reader diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index 28be7d9f869b..cc7c13184108 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -10,7 +10,7 @@ public class Range { private static final Pattern FULL_ADDRESS = Pattern.compile("^('.+'|[^'!]+)!(.+)$"); - private static String[] parseFullAddress(String fullAddress) { + private static String[] parseFullAddress(String fullAddress) throws IllegalArgumentException { if (fullAddress == null) { throw new IllegalArgumentException("fullAddress cannot be NULL."); } @@ -37,7 +37,7 @@ private static String[] parseFullAddress(String fullAddress) { private static final Pattern RANGE_RC = Pattern.compile("^(" + ADDRESS_RC + ")(?::(" + ADDRESS_RC + "))?$"); - private static int[] parseRange(String range) { + private static int[] parseRange(String range) throws IllegalArgumentException { for (Pattern pattern : new Pattern[] {RANGE_A1, RANGE_COL, RANGE_ROW, RANGE_RC}) { Optional parsed = parseRange(range, pattern, pattern == RANGE_RC ? Range::parseRC : Range::parseA1); @@ -72,7 +72,9 @@ private static Optional parseRange( }); } - private static boolean isLetter(char c) { return c >= 'A' && c <= 'Z'; } + private static boolean isLetter(char c) { + return c >= 'A' && c <= 'Z'; + } private static boolean isDigit(char c) { return c >= '0' && c <= '9'; @@ -86,70 +88,82 @@ private static int skipDollar(CharSequence address, int index) { } private static int[] parseA1(CharSequence address) { - int col = 0; - - int index = skipDollar(address, 0); - while (index < address.length() && isLetter(address.charAt(index))) { - col = 26 * col + (address.charAt(index) - 'A' + 1); - index++; - } - - index = skipDollar(address, index); - int row = index < address.length() ? Integer.parseInt(address, index, address.length(), 10) : 0; - return new int[] {row, col}; + ParsedInteger col = parseColumn(address); + ParsedInteger row = parseInteger(address, skipDollar(address, col.index)); + return new int[] {row.value, col.value}; } - private static int[] parseRC(CharSequence address) { + private static int[] parseRC(CharSequence address) throws IllegalArgumentException { int index = 0; int row = 0; if (index < address.length() && address.charAt(index) == 'R') { - // Parse Row - int endIndex = index + 1; - while (endIndex < address.length() && isDigit(address.charAt(endIndex))) { - endIndex++; + ParsedInteger parsed = parseInteger(address, index + 1); + if (parsed.value == 0) { + throw new IllegalArgumentException(address + " not an absolute R1C1 style addresses."); } - if (endIndex == index + 1) { - throw new IllegalArgumentException("R1C1 style addresses must be absolute."); - } - - row = Integer.parseInt(address, index + 1, endIndex, 10); - index = endIndex; + row = parsed.value; + index = parsed.index; } int col = 0; if (index < address.length() && address.charAt(index) == 'C') { - // Parse Row - int endIndex = index + 1; - while (endIndex < address.length() && isDigit(address.charAt(endIndex))) { - endIndex++; - } - - if (endIndex == index + 1) { - throw new IllegalArgumentException("R1C1 style addresses must be absolute."); + ParsedInteger parsed = parseInteger(address, index + 1); + if (parsed.value == 0) { + throw new IllegalArgumentException(address + " not an absolute R1C1 style addresses."); } - col = Integer.parseInt(address, index + 1, endIndex, 10); + col = parsed.value; } return new int[] {row, col}; } - public static int parseA1Column(CharSequence column) { + /** + * Convert an Excel Column Name (e.g. DCR) into the index (1-based) + * + * @param column name + * @return Column index (A=1 ...) + */ + public static int parseA1Column(CharSequence column) throws IllegalArgumentException { + ParsedInteger parsed = parseColumn(column); + if (parsed.index != column.length() || parsed.value == 0) { + throw new IllegalArgumentException(column + " is not a valid Excel Column Name."); + } + + return parsed.value; + } + + private static class ParsedInteger { + public final int index; + public final int value; + + public ParsedInteger(int index, int value) { + this.index = index; + this.value = value; + } + } + + private static ParsedInteger parseInteger(CharSequence address, int index) { + int endIndex = index; + while (endIndex < address.length() && isDigit(address.charAt(endIndex))) { + endIndex++; + } + return new ParsedInteger(endIndex, Integer.parseInt(address, index + 1, endIndex, 10)); + } + + private static ParsedInteger parseColumn(CharSequence column) { int col = 0; - int index = 0; + int index = skipDollar(column, 0); + while (index < column.length() && isLetter(column.charAt(index))) { col = 26 * col + (column.charAt(index) - 'A' + 1); index++; } - if (index != column.length()) { - return -1; - } - - return col; + return new ParsedInteger(index, col); } private final String sheetName; @@ -158,7 +172,7 @@ public static int parseA1Column(CharSequence column) { private final int topRow; private final int bottomRow; - public Range(String fullAddress) { + public Range(String fullAddress) throws IllegalArgumentException { String[] sheetAndRange = parseFullAddress(fullAddress); this.sheetName = sheetAndRange[0].replaceAll("^'(.*)'$", "$1").replaceAll("''", "'"); @@ -215,8 +229,10 @@ public String getAddress() { (isWholeRow() ? "" : CellReference.convertNumToColString(getLeftColumn() - 1)) + (isWholeColumn() ? "" : Integer.toString(getTopRow())); if (getLeftColumn() != getRightColumn() || getTopRow() != getBottomRow()) { - range += ":" + (isWholeRow() ? "" : CellReference.convertNumToColString(getRightColumn() - 1)) - + (isWholeColumn() ? "" : Integer.toString(getBottomRow())); + range += + ":" + + (isWholeRow() ? "" : CellReference.convertNumToColString(getRightColumn() - 1)) + + (isWholeColumn() ? "" : Integer.toString(getBottomRow())); } return sheetNameEscaped + "!" + range; diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index 14f67194a669..400a77ce9105 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -42,7 +42,7 @@ public class Reader { * @param hasHeaders specifies whether the first non-empty row of the sheet should be used for * column names. * @param unnamedColumnPrefix specifies the prefix to use for missing columns. - * @param mkDate a function converting java-based dates into a format understandable by the + * @param mkDate a function converting Java-based dates into a format understandable by the * caller. * @return a {@link Table} containing the specified data. * @throws IOException when the input stream cannot be read. @@ -77,7 +77,7 @@ public static Table read_xlsx( * @param hasHeaders specifies whether the first non-empty row of the sheet should be used for * column names. * @param unnamedColumnPrefix specifies the prefix to use for missing columns. - * @param mkDate a function converting java-based dates into a format understandable by the + * @param mkDate a function converting Java-based dates into a format understandable by the * caller. * @return a {@link Table} containing the specified data. * @throws IOException when the input stream cannot be read. @@ -198,7 +198,10 @@ private static Table read_table( return new Table(columns); } - private static Table readSheetToTable(Sheet sheet, Range range, int skipRows, int rowCount) { + private static Table readSheetToTable( + Workbook workbook, int sheetIndex, Range range, int skipRows, int rowCount) { + Sheet sheet = workbook.getSheetAt(sheetIndex); + // Row Range int firstRow = sheet.getFirstRowNum() + 1; int lastRow = sheet.getLastRowNum() + 1; @@ -255,15 +258,6 @@ private static Table readSheetToTable(Sheet sheet, Range range, int skipRows, in return new Table(columns); } - private static String getRefersTo(Workbook workbook, String rangeName) { - for (Name name : workbook.getAllNames()) { - if (name.getNameName().equalsIgnoreCase(rangeName)) { - return name.getRefersToFormula(); - } - } - return null; - } - private static int getSheetIndex(Workbook workbook, String sheetName) { int sheetCount = workbook.getNumberOfSheets(); for (int i = 0; i < sheetCount; i++) { @@ -328,9 +322,9 @@ public static Table readSheetByName( throw new IllegalArgumentException("Unknown sheet '" + sheetName + "'."); } - Sheet sheet = workbook.getSheetAt(sheetIndex); return readSheetToTable( - sheet, + workbook, + sheetIndex, null, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); @@ -347,27 +341,45 @@ public static Table readSheetByIndex( "Sheet index is not in valid range (1 to " + sheetCount + " inclusive)."); } - Sheet sheet = workbook.getSheetAt(index - 1); return readSheetToTable( - sheet, + workbook, + index - 1, null, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); } + public static Table readRangeByName( + InputStream stream, + String rangeNameOrAddress, + Integer skip_rows, + Integer row_limit, + boolean xls_format) + throws IOException { + Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); + + Name name = workbook.getName(rangeNameOrAddress); + Range range = new Range(name == null ? rangeNameOrAddress : name.getRefersToFormula()); + return readRange(workbook, range, skip_rows, row_limit); + } + public static Table readRange( InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException { Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); + return readRange(workbook, range, skip_rows, row_limit); + } + private static Table readRange( + Workbook workbook, Range range, Integer skip_rows, Integer row_limit) { int sheetIndex = getSheetIndex(workbook, range.getSheetName()); if (sheetIndex == -1) { throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'."); } - Sheet sheet = workbook.getSheetAt(sheetIndex); return readSheetToTable( - sheet, + workbook, + sheetIndex, range, skip_rows == null ? 0 : skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit); From 7f64b598bfdcaee03016ad038a84bd96215ac84c Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 4 May 2022 12:00:10 +0100 Subject: [PATCH 15/28] Java tidy and restructure to allow range names and addresses. --- .../lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso | 12 +++++++----- .../main/java/org/enso/table/format/xlsx/Range.java | 3 ++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index dff366ae688a..3e4286cd5a19 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -144,12 +144,14 @@ read_excel file section _ xls_format=False = Reader.readSheetByName stream sheet skip_rows row_limit xls_format Range address skip_rows row_limit -> Table.Table <| - if address.is_an Excel_Range then Reader.readRange stream range.java_range skip_rows row_limit xls_format else - Reader.readRangeByName stream range skip_rows row_limit xls_format + if address.is_an Excel_Range then Reader.readRange stream address.java_range skip_rows row_limit xls_format else + Reader.readRangeByName stream address skip_rows row_limit xls_format file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause + catch_file_failure function = Panic.catch IllegalArgumentException handler=file_failure function + bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause) + catch_bad_argument function = Panic.catch IOException handler=bad_argument function - Panic.catch IllegalArgumentException handler= <| - Panic.catch IOException handler=file_failure <| - file.with_input_stream [File.Option.Read] stream->stream.with_java_stream reader + catch_file_failure <| catch_bad_argument <| + file.with_input_stream [File.Option.Read] stream->(stream.with_java_stream reader) diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index cc7c13184108..2071bba4adb7 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -150,7 +150,8 @@ private static ParsedInteger parseInteger(CharSequence address, int index) { while (endIndex < address.length() && isDigit(address.charAt(endIndex))) { endIndex++; } - return new ParsedInteger(endIndex, Integer.parseInt(address, index + 1, endIndex, 10)); + return new ParsedInteger( + endIndex, endIndex == index ? 0 : Integer.parseInt(address, index, endIndex, 10)); } private static ParsedInteger parseColumn(CharSequence column) { From 4e2dc26ca55c043f0a3247fecec180b3b5ddfdca Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 4 May 2022 13:34:58 +0100 Subject: [PATCH 16/28] Range tests and issues --- .../Table/0.0.0-dev/src/Io/Excel.enso | 4 +-- .../org/enso/table/format/xlsx/Reader.java | 35 ++++++++++++------- test/Table_Tests/src/Excel_Spec.enso | 29 +++++++++++++-- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 3e4286cd5a19..6ecdeb4a5901 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -148,10 +148,10 @@ read_excel file section _ xls_format=False = Reader.readRangeByName stream address skip_rows row_limit xls_format file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause - catch_file_failure function = Panic.catch IllegalArgumentException handler=file_failure function + catch_file_failure = Panic.catch IllegalArgumentException handler=file_failure bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause) - catch_bad_argument function = Panic.catch IOException handler=bad_argument function + catch_bad_argument = Panic.catch IOException handler=bad_argument catch_file_failure <| catch_bad_argument <| file.with_input_stream [File.Option.Read] stream->(stream.with_java_stream reader) diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index 400a77ce9105..3d73ca1c404d 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -206,34 +206,28 @@ private static Table readSheetToTable( int firstRow = sheet.getFirstRowNum() + 1; int lastRow = sheet.getLastRowNum() + 1; int startRow = (range == null || range.isWholeColumn() ? 1 : range.getTopRow()) + skipRows; - int endRow = - Math.min( - range == null || range.isWholeColumn() ? lastRow : range.getBottomRow(), - rowCount == Integer.MAX_VALUE ? Integer.MAX_VALUE : startRow + rowCount - 1); + int endRow = range == null || range.isWholeColumn() ? lastRow : range.getBottomRow(); // Columns int startCol = (range == null || range.isWholeRow() ? 1 : range.getLeftColumn()); int endCol = (range == null || range.isWholeRow() ? -1 : range.getRightColumn()); + int size = Math.min(rowCount, endRow - startRow + 1); List builders = endCol == -1 ? new ArrayList<>() : IntStream.range(startCol, endCol + 1) - .mapToObj(i -> new InferredBuilder(endRow - startRow + 1)) + .mapToObj(i -> new InferredBuilder(size)) .collect(Collectors.toList()); // Read Cell Data - for (int row = startRow; row <= endRow; row++) { + int row = startRow; + while (row <= endRow && (row - startRow) < rowCount) { if (row < firstRow || row > lastRow) { builders.forEach(b -> b.append(null)); } else { Row currentRow = sheet.getRow(row - 1); - int currentEndCol = endCol == -1 ? currentRow.getLastCellNum() + 1 : endCol; - for (int i = builders.size(); i <= currentEndCol - startCol; i++) { - Builder builder = new InferredBuilder(endRow - startRow + 1); - builder.appendNulls(row - startRow); - builders.add(builder); - } + expandBuilders(builders, size, currentEndCol - startCol, row - startRow); int firstCol = currentRow.getFirstCellNum() + 1; int lastCol = currentRow.getLastCellNum(); @@ -243,6 +237,15 @@ private static Table readSheetToTable( builders.get(col - startCol).append(value); } } + + row++; + } + + // Special case for stopping before firstRow + if (endCol == -1 && (rowCount == 0 || row < firstRow)) { + Row currentRow = sheet.getRow(firstRow - 1); + int currentEndCol = currentRow.getLastCellNum() + 1; + expandBuilders(builders, size, currentEndCol - startCol, size); } // Create Table @@ -258,6 +261,14 @@ private static Table readSheetToTable( return new Table(columns); } + private static void expandBuilders(List builders, int size, int columnCount, int rows) { + for (int i = builders.size(); i <= columnCount; i++) { + Builder builder = new InferredBuilder(size); + builder.appendNulls(rows); + builders.add(builder); + } + } + private static int getSheetIndex(Workbook workbook, String sheetName) { int sheetCount = workbook.getNumberOfSheets(); for (int i = 0; i < sheetCount; i++) { diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index fb84c39d9837..f11e6a4d35ef 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -1,4 +1,4 @@ -from Standard.Base import all +from Standard.Base import Nothing, File, Illegal_Argument_Error import Standard.Base.Data.Time.Date @@ -168,9 +168,34 @@ spec = table = xls_sheet.read (File_Format.Excel (Sheet "Sheet1")) check_table table - table_2 = xls_sheet.read (File_Format.Excel (Sheet "Sheet1" (table.length - col_a.length))) + Test.specify "should let you read by range" <| + table = xlsx_sheet.read (File_Format.Excel (Range "Sheet1!A:C")) + check_table table + + table_2 = xlsx_sheet.read (File_Format.Excel (Range "Sheet1!A:C" (table.length - col_a.length))) + table_2.length . should_equal col_a.length + check_table <| table_2 + + check_table <| xlsx_sheet.read (File_Format.Excel (Range "Sheet1!10:13")) + check_table <| xlsx_sheet.read (File_Format.Excel (Range "Sheet1!A10:C13")) + + Test.specify "should let you read by range name" <| + table = xlsx_sheet.read (File_Format.Excel (Range "myData")) + table.length . should_equal col_a.length + check_table <| table + + Test.specify "should let you restrict number of rows read and skip rows" <| + table = xlsx_sheet.read (File_Format.Excel (Sheet "Sheet1")) + check_table table + + table_2 = xlsx_sheet.read (File_Format.Excel (Sheet "Sheet1" (table.length - col_a.length))) table_2.length . should_equal col_a.length check_table <| table_2 + table_3 = xlsx_sheet.read (File_Format.Excel (Sheet "Sheet1" (table.length - col_a.length) 2)) + table_3.length . should_equal 2 + + table_4 = xlsx_sheet.read (File_Format.Excel (Sheet "Sheet1" row_limit=6)) + table_4.length . should_equal 6 main = Test.Suite.run_main here.spec From b7eacda16f799b18d6f7399d58f8afdac9b843fe Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 4 May 2022 14:30:07 +0100 Subject: [PATCH 17/28] Test update --- test/Table_Tests/src/Delimited_Read_Spec.enso | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/test/Table_Tests/src/Delimited_Read_Spec.enso b/test/Table_Tests/src/Delimited_Read_Spec.enso index 5a89ac155a91..1779323044ac 100644 --- a/test/Table_Tests/src/Delimited_Read_Spec.enso +++ b/test/Table_Tests/src/Delimited_Read_Spec.enso @@ -1,12 +1,16 @@ from Standard.Base import all +import Standard.Base.Error.Problem_Behavior import Standard.Table import Standard.Table.Data.Column from Standard.Table.Error import all + +import Standard.Table.Io.File_Read import Standard.Table.Io.File_Format -import Standard.Base.Error.Problem_Behavior + import Standard.Test import Standard.Test.Problems + import project.Util spec = @@ -16,7 +20,7 @@ spec = c_2 = ["b", ['2', Nothing, '8', '11']] c_3 = ["c", [Nothing, '6', '9', '12']] expected_table = Table.new [c_1, c_2, c_3] - simple_empty = (File_Format.Delimited "," headers=True).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True) simple_empty.should_equal expected_table Test.specify "should load a simple table without headers" <| @@ -24,11 +28,11 @@ spec = c_2 = ["Column_2", ['b', '2', Nothing, '8', '11']] c_3 = ["Column_3", ['c', Nothing, '6', '9', '12']] expected_table = Table.new [c_1, c_2, c_3] - simple_empty = (File_Format.Delimited "," headers=False).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False) simple_empty.should_equal expected_table Test.specify "should work in presence of missing headers" <| - table = (File_Format.Delimited "," headers=True).read (Enso_Project.data / "missing_header.csv") Problem_Behavior.Report_Error + table = File.read (Enso_Project.data / "missing_header.csv") (File_Format.Delimited "," headers=True) table.columns.map .name . should_equal ["a", "Column", "c", "Column_1", "d"] table.at "a" . to_vector . should_equal ["1"] table.at "Column" . to_vector . should_equal ["2"] @@ -37,40 +41,40 @@ spec = table.at "d" . to_vector . should_equal ["5"] Test.specify "load even an empty file" <| - table = (File_Format.Delimited "," headers=True).read (Enso_Project.data / "empty.txt") Problem_Behavior.Report_Error + table = File.read (Enso_Project.data / "empty.txt") (File_Format.Delimited "," headers=True) table.columns.map .name . should_equal [] table.row_count . should_equal 0 Test.specify "should correctly handle file opening issues" <| nonexistent_file = Enso_Project.data / "a_filename_that_does_not_exist.foobar" - r1 = (File_Format.Delimited "," headers=True).read nonexistent_file Problem_Behavior.Report_Error + r1 = File.read nonexistent_file (File_Format.Delimited "," headers=True) r1.should_fail_with File.File_Not_Found directory = Enso_Project.data - r2 = (File_Format.Delimited "," headers=True).read directory Problem_Behavior.Report_Error + r2 = File.read directory (File_Format.Delimited "," headers=True) Problem_Behavior.Report_Error r2.should_fail_with File.Io_Error Test.specify "should handle duplicated columns" <| - table = (File_Format.Delimited "," headers=True).read (Enso_Project.data / "duplicated_columns.csv") Problem_Behavior.Report_Error + table = File.read (Enso_Project.data / "duplicated_columns.csv") (File_Format.Delimited "," headers=True) table.columns.map .name . should_equal ['a', 'b', 'c', 'a_1'] table.at 'a' . to_vector . should_equal ['1'] table.at 'a_1' . to_vector . should_equal ['4'] Test.specify "should handle quotes" <| - t1 = (File_Format.Delimited "," headers=True).read (Enso_Project.data / "double_quoted.csv") Problem_Behavior.Report_Error + t1 = File.read (Enso_Project.data / "double_quoted.csv") (File_Format.Delimited "," headers=True) t1.at 'a' . to_vector . should_equal ['a, x', '"a'] t1.at 'c' . to_vector . should_equal ['3', '"'] - t2 = (File_Format.Delimited "," headers=True quote_escape="\").read (Enso_Project.data / "escape_quoted.csv") Problem_Behavior.Report_Error + t2 = File.read (Enso_Project.data / "escape_quoted.csv") (File_Format.Delimited "," headers=True quote_escape="\") t2.at 'a' . to_vector . should_equal ['a"b', 'a\\\"z'] - t3 = (File_Format.Delimited "," quote=Nothing headers=True).read (Enso_Project.data / "no_quoting.csv") Problem_Behavior.Report_Error + t3 = File.read (Enso_Project.data / "no_quoting.csv") (File_Format.Delimited "," quote=Nothing headers=True) t3.at 'a' . to_vector . should_equal ['"y'] t3.at 'b' . to_vector . should_equal ['z"'] t3.at 'c' . to_vector . should_equal ['a'] Test.specify "should support rows spanning multiple lines if quoted" <| - t1 = (File_Format.Delimited "," headers=True).read (Enso_Project.data / "multiline_quoted.csv") Problem_Behavior.Report_Error + t1 = File.read (Enso_Project.data / "multiline_quoted.csv") (File_Format.Delimited "," headers=True) t1.at 'a' . to_vector . should_equal ['1', '4'] t1.at 'b' . to_vector . should_equal ['start\n\ncontinue', '5'] t1.at 'c' . to_vector . should_equal ['3', '6'] @@ -131,33 +135,33 @@ spec = Problems.test_problem_handling action problems tester Test.specify "should allow to skip rows" <| - t1 = (File_Format.Delimited "," headers=False skip_rows=3).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t1 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3) t1.at "Column_1" . to_vector . should_equal ['7', '10'] - t2 = (File_Format.Delimited "," headers=True skip_rows=3).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t2 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True skip_rows=3) t2.columns.map .name . should_equal ['7', '8', '9'] t2.at "7" . to_vector . should_equal ['10'] Test.specify "should allow to set a limit of rows to read" <| - t1 = (File_Format.Delimited "," headers=False row_limit=2).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t1 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False row_limit=2) t1.at "Column_1" . to_vector . should_equal ['a', '1'] - t2 = (File_Format.Delimited "," headers=True row_limit=2).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t2 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True row_limit=2) t2.at "a" . to_vector . should_equal ['1', '4'] - t3 = (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t3 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1) t3.at "Column_1" . to_vector . should_equal ['7'] - t4 = (File_Format.Delimited "," headers=False row_limit=0).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t4 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False row_limit=0) t4.columns.map .name . should_equal ['Column_1', 'Column_2', 'Column_3'] t4.row_count . should_equal 0 - t5 = (File_Format.Delimited "," headers=True row_limit=0).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t5 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True row_limit=0) t5.columns.map .name . should_equal ['a', 'b', 'c'] t5.at 'a' . to_vector . should_equal [] t5.row_count . should_equal 0 - t6 = (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1000).read (Enso_Project.data / "simple_empty.csv") Problem_Behavior.Report_Error + t6 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1000) t6.at "Column_1" . to_vector . should_equal ['7', '10'] Test.specify "should check arguments" <| From 280f2dc4af2a6b7507f6e1f78964f618195c0f1d Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 4 May 2022 14:36:10 +0100 Subject: [PATCH 18/28] Fix Windows issue --- .../table/src/main/java/org/enso/table/read/DelimitedReader.java | 1 + 1 file changed, 1 insertion(+) diff --git a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java index b5ce43e62dfd..6a8f10bff344 100644 --- a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java +++ b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java @@ -148,6 +148,7 @@ private CsvParser setupCsvParser(InputStream inputStream) { settings.setMaxColumns(maxColumns); settings.setSkipEmptyLines(false); settings.setKeepQuotes(true); + settings.setLineSeparatorDetectionEnabled(true); CsvParser parser = new CsvParser(settings); parser.beginParsing(inputStream); return parser; From c75d67c0d0115575bb255ecf307acef1dc9e2432 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 5 May 2022 14:39:30 +0100 Subject: [PATCH 19/28] Update distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Radosław Waśko --- distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 6ecdeb4a5901..1e36750c3f56 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -82,7 +82,7 @@ type Excel_Range from_address : Text -> Excel_Range from_address address = Panic.catch IllegalArgumentException (Excel_Range (Java_Range.new address)) caught_panic-> - Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage) + Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage caught_panic.payload.cause) ## Create a Range for a single cell. for_cell : Text -> (Text|Integer) -> Integer -> Excel_Range From 0ad1e2c20be93f16ce1e4b7a71e31756b038762b Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 5 May 2022 14:39:55 +0100 Subject: [PATCH 20/28] Update distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Radosław Waśko --- distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 1e36750c3f56..cbe42e7bfc63 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -130,7 +130,7 @@ type Excel_Range ## PRIVATE Wrapper for validation validate : Boolean -> Text -> Any -validate ~validation ~error_message ~wrapped = +validate validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector) From dfb86203e30ba6e7e84549f8f19d106dd82ba601 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 5 May 2022 14:42:30 +0100 Subject: [PATCH 21/28] Update test/Table_Tests/src/Excel_Spec.enso MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Radosław Waśko --- test/Table_Tests/src/Excel_Spec.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index f11e6a4d35ef..4fd197a614be 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -116,7 +116,7 @@ spec = check_table table = check_column (table.at "A") col_a check_column (table.at "B") col_b - ## ToDo [JD]: Can't check Dates at present as not being handled correctly. Coming as as Polyglot array + ## ToDo [JD]: Can't check Dates at present as not being handled correctly. Coming as a Polyglot array https://www.pivotaltracker.com/story/show/181755990 # check_column (table.at "C") col_c From 66b0164a87ba97c88b34e8e61c3269e61f61ad22 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 5 May 2022 16:08:30 +0100 Subject: [PATCH 22/28] PR comments set 1 --- .../Standard/Table/0.0.0-dev/src/Error.enso | 2 +- .../Table/0.0.0-dev/src/Io/Excel.enso | 41 +++++++----- .../org/enso/table/format/xlsx/Range.java | 40 ++++++++---- .../org/enso/table/format/xlsx/Reader.java | 62 +++++++++++++++++++ 4 files changed, 117 insertions(+), 28 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso index dc7f8b2ea4dc..4b8ecbc571d3 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso @@ -117,4 +117,4 @@ type Invalid_Location (location:Text) Invalid_Location.to_display_text : Text Invalid_Location.to_display_text = - "The location '"+this.location+"' is not valid." \ No newline at end of file + "The location '"+this.location+"' is not valid." diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index cbe42e7bfc63..45f03760f8bf 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -66,10 +66,18 @@ type Excel_Range to_text = "Excel_Range " + this.address ## Validates if a column index (1-based) is within the valid range for Excel. + + Arguments: + - column: 1-based index to check + - limit: maximum valid index, defaults to Excel 2007+ limit of 16,384 is_valid_column : Integer -> Integer -> Boolean is_valid_column column (limit=16384) = (column > 0) && (column <= limit) ## Validates if a row index (1-based) is within the valid range for Excel. + + Arguments: + - row: 1-based index to check + - limit: maximum valid index, defaults to Excel 2007+ limit of 1,048,576 is_valid_row : Integer -> Integer -> Boolean is_valid_row row (limit=1048576) = (row > 0) && (row <= limit) @@ -89,10 +97,11 @@ type Excel_Range for_cell sheet column row = col_index = Excel_Range.column_index column - col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".") _ - all_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".") (col_valid _) + col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".") + row_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet col_index row col_index row) + col_valid <| row_valid <| + Excel_Range (Java_Range.new sheet col_index row col_index row) ## Create a Range for a range of cells. for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Excel_Range @@ -100,12 +109,13 @@ type Excel_Range left_index = Excel_Range.column_index left right_index = Excel_Range.column_index right - left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _ - right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _) - top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") (right_valid _) - all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") + right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") + bottom_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet left_index top right_index bottom) + left_valid <| right_valid <| top_valid <| bottom_valid <| + Excel_Range (Java_Range.new sheet left_index top right_index bottom) ## Create an Excel_Range for a set of columns. for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Excel_Range @@ -113,18 +123,20 @@ type Excel_Range left_index = Excel_Range.column_index left right_index = Excel_Range.column_index right - left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _ - all_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") + right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet left_index 0 right_index 0) + left_valid <| right_valid <| + Excel_Range (Java_Range.new sheet left_index 0 right_index 0) ## Create an Excel_Range for a set of rows. for_rows : Text -> Integer -> Integer -> Excel_Range for_rows sheet top (bottom=top) = - top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") _ - all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _) + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") + bottom_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet 0 top 0 bottom) + top_valid <| bottom_valid <| + Excel_Range (Java_Range.new sheet 0 top 0 bottom) ## PRIVATE @@ -133,6 +145,7 @@ validate : Boolean -> Text -> Any validate validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) + read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector) read_excel file section _ xls_format=False = reader stream = case section of diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index 2071bba4adb7..e076623b04c4 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -23,6 +23,10 @@ private static String[] parseFullAddress(String fullAddress) throws IllegalArgum return new String[] {matcher.group(1), matcher.group(2)}; } + private static String unescapeSheetName(String sheetName) { + return sheetName.replaceAll("^'(.*)'$", "$1").replaceAll("''", "'"); + } + private static final String ADDRESS_A1 = "\\$?[A-Z]{1,3}\\$?\\d+"; private static final String ADDRESS_COL = "\\$?[A-Z]{1,3}"; private static final String ADDRESS_ROW = "\\$?\\d+"; @@ -40,7 +44,7 @@ private static String[] parseFullAddress(String fullAddress) throws IllegalArgum private static int[] parseRange(String range) throws IllegalArgumentException { for (Pattern pattern : new Pattern[] {RANGE_A1, RANGE_COL, RANGE_ROW, RANGE_RC}) { Optional parsed = - parseRange(range, pattern, pattern == RANGE_RC ? Range::parseRC : Range::parseA1); + parseRange(range, pattern, pattern == RANGE_RC ? Range::parseR1C1StyleAddress : Range::parseA1StyleAddress); if (parsed.isPresent()) { return parsed.get(); @@ -87,13 +91,13 @@ private static int skipDollar(CharSequence address, int index) { return index; } - private static int[] parseA1(CharSequence address) { - ParsedInteger col = parseColumn(address); + private static int[] parseA1StyleAddress(CharSequence address) { + ParsedInteger col = parseColumn(address, skipDollar(address, 0)); ParsedInteger row = parseInteger(address, skipDollar(address, col.index)); return new int[] {row.value, col.value}; } - private static int[] parseRC(CharSequence address) throws IllegalArgumentException { + private static int[] parseR1C1StyleAddress(CharSequence address) throws IllegalArgumentException { int index = 0; int row = 0; @@ -127,7 +131,7 @@ private static int[] parseRC(CharSequence address) throws IllegalArgumentExcepti * @return Column index (A=1 ...) */ public static int parseA1Column(CharSequence column) throws IllegalArgumentException { - ParsedInteger parsed = parseColumn(column); + ParsedInteger parsed = parseColumn(column, skipDollar(column, 0)); if (parsed.index != column.length() || parsed.value == 0) { throw new IllegalArgumentException(column + " is not a valid Excel Column Name."); } @@ -136,7 +140,13 @@ public static int parseA1Column(CharSequence column) throws IllegalArgumentExcep } private static class ParsedInteger { + /** + * Index to the next character after the parsed value + */ public final int index; + /** + * Parsed integer value or 0 if not valid + */ public final int value; public ParsedInteger(int index, int value) { @@ -154,11 +164,10 @@ private static ParsedInteger parseInteger(CharSequence address, int index) { endIndex, endIndex == index ? 0 : Integer.parseInt(address, index, endIndex, 10)); } - private static ParsedInteger parseColumn(CharSequence column) { + private static ParsedInteger parseColumn(CharSequence column, int startIndex) { int col = 0; - int index = skipDollar(column, 0); - + int index = startIndex; while (index < column.length() && isLetter(column.charAt(index))) { col = 26 * col + (column.charAt(index) - 'A' + 1); index++; @@ -175,7 +184,7 @@ private static ParsedInteger parseColumn(CharSequence column) { public Range(String fullAddress) throws IllegalArgumentException { String[] sheetAndRange = parseFullAddress(fullAddress); - this.sheetName = sheetAndRange[0].replaceAll("^'(.*)'$", "$1").replaceAll("''", "'"); + this.sheetName = unescapeSheetName(sheetAndRange[0]); int[] range = parseRange(sheetAndRange[1]); this.leftColumn = range[1]; @@ -196,6 +205,14 @@ public String getSheetName() { return sheetName; } + public String getEscapedSheetName() { + String sheetNameEscaped = sheetName; + if (sheetNameEscaped.contains(" ") || sheetNameEscaped.contains("'")) { + sheetNameEscaped = "'" + sheetNameEscaped.replace("'", "''") + "'"; + } + return sheetNameEscaped; + } + public boolean isWholeRow() { return leftColumn == 0; } @@ -221,10 +238,7 @@ public int getBottomRow() { } public String getAddress() { - String sheetNameEscaped = getSheetName(); - if (sheetNameEscaped.contains(" ") || sheetNameEscaped.contains("'")) { - sheetNameEscaped = "'" + sheetNameEscaped.replace("'", "''") + "'"; - } + String sheetNameEscaped = getEscapedSheetName(); String range = (isWholeRow() ? "" : CellReference.convertNumToColString(getLeftColumn() - 1)) diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index 3d73ca1c404d..81c32dec5aa5 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -304,6 +304,14 @@ private static Object getCellValue(Cell cell) { return null; } + /** + * Reads a list of sheet names for the specified XLSX/XLS file into an array. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a String[] containing the sheet names. + * @throws IOException when the input stream cannot be read. + */ public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException { Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); int sheetCount = workbook.getNumberOfSheets(); @@ -314,11 +322,30 @@ public static String[] readSheetNames(InputStream stream, boolean xls_format) th return output; } + /** + * Reads a list of range names for the specified XLSX/XLS file into an array. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a String[] containing the range names. + * @throws IOException when the input stream cannot be read. + */ public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException { Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); return workbook.getAllNames().stream().map(Name::getNameName).toArray(String[]::new); } + /** + * Reads a sheet by name for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param sheetName the name of the sheet to read. + * @param skip_rows skip rows from the top the sheet. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readSheetByName( InputStream stream, String sheetName, @@ -341,6 +368,17 @@ public static Table readSheetByName( row_limit == null ? Integer.MAX_VALUE : row_limit); } + /** + * Reads a sheet by index for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param index the 1-based index to the sheet. + * @param skip_rows skip rows from the top the sheet. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readSheetByIndex( InputStream stream, int index, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException, IllegalArgumentException { @@ -360,6 +398,18 @@ public static Table readSheetByIndex( row_limit == null ? Integer.MAX_VALUE : row_limit); } + + /** + * Reads a range by name or address for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param rangeNameOrAddress name or address of the range to read. + * @param skip_rows skip rows from the top of the range. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readRangeByName( InputStream stream, String rangeNameOrAddress, @@ -374,6 +424,18 @@ public static Table readRangeByName( return readRange(workbook, range, skip_rows, row_limit); } + + /** + * Reads a range for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param range the range to read. + * @param skip_rows skip rows from the top of the range. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readRange( InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException { From f38d4712d9425f26fca42e2e68066b924ed3986a Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 5 May 2022 16:17:38 +0100 Subject: [PATCH 23/28] PR comments set 2 --- .../lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 45f03760f8bf..67d1e7997201 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -152,13 +152,13 @@ read_excel file section _ xls_format=False = Sheet_Names -> Vector.Vector (Reader.readSheetNames stream xls_format) Range_Names -> Vector.Vector (Reader.readRangeNames stream xls_format) Sheet sheet skip_rows row_limit -> - Table.Table <| - if sheet.is_an Integer then (Reader.readSheetByIndex stream sheet skip_rows row_limit xls_format) else - Reader.readSheetByName stream sheet skip_rows row_limit xls_format + Table.Table <| case sheet of + Integer -> Reader.readSheetByIndex stream sheet skip_rows row_limit xls_format + Text -> Reader.readSheetByName stream sheet skip_rows row_limit xls_format Range address skip_rows row_limit -> - Table.Table <| - if address.is_an Excel_Range then Reader.readRange stream address.java_range skip_rows row_limit xls_format else - Reader.readRangeByName stream address skip_rows row_limit xls_format + Table.Table <| case address of + Excel_Range _ -> Reader.readRange stream address.java_range skip_rows row_limit xls_format + Text -> Reader.readRangeByName stream address skip_rows row_limit xls_format file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause catch_file_failure = Panic.catch IllegalArgumentException handler=file_failure From 74e57056ecea481d3e76154dbf268791c9d33ded Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 5 May 2022 17:17:09 +0100 Subject: [PATCH 24/28] PR comments set 3 --- .../Standard/Table/0.0.0-dev/src/Io/Excel.enso | 17 ++++++++++++++--- .../Table/0.0.0-dev/src/Io/File_Format.enso | 13 +++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 67d1e7997201..6c3c8d093eb7 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -145,7 +145,18 @@ validate : Boolean -> Text -> Any validate validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) - +## PRIVATE + Reads an input Excel file according to the provided section. + + Arguments: + - file: The File object to read. + - section: The part of the Excel document to read. + - on_problems: Specifies the behavior when a problem occurs during the + operation. By default, a warning is issued, but the operation proceeds. + If set to `Report_Error`, the operation fails with a dataflow error. + If set to `Ignore`, the operation proceeds without errors or warnings. + - xls_format: If `True` then the file is read in using Excel 95-2003 format + otherwise reads in Excel 2007+ format. read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector) read_excel file section _ xls_format=False = reader stream = case section of @@ -161,10 +172,10 @@ read_excel file section _ xls_format=False = Text -> Reader.readRangeByName stream address skip_rows row_limit xls_format file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause - catch_file_failure = Panic.catch IllegalArgumentException handler=file_failure + catch_file_failure = Panic.catch IOException handler=file_failure bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause) - catch_bad_argument = Panic.catch IOException handler=bad_argument + catch_bad_argument = Panic.catch IllegalArgumentException handler=bad_argument catch_file_failure <| catch_bad_argument <| file.with_input_stream [File.Option.Read] stream->(stream.with_java_stream reader) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index f68da2b16c6e..8c01ee3e7ba5 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -120,6 +120,19 @@ type Infer ## Read the file to a `Table` from an Excel file type Excel + ## Read Excels files into a Table or Vector. + + Arguments: + - section: The `Excel_Section` to read from the workbook. + This can be one of: + - `Sheet_Names` - outputs a `Vector` of sheet names. + - `Range_Names` - outputs a `Vector` of range names. + - `Sheet` - outputs a `Table` containing the specified sheet. + - `Range` - outputs a `Table` containing the specified range. + - `xls_format`: + If set to `True`, the file is read as an Excel 95-2003 format. + If set to `False`, the file is read as an Excel 2007+ format. + `Infer` will attempt to deduce this from the extension of the filename. type Excel (section:Excel_Section=Excel_Module.Sheet_Names) (xls_format:(True|False|Infer)=Infer) ## Implements the `File.read` for this `File_Format` From 089d56fb0b1c79babf0e6ad00fa610b914ea92b8 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 6 May 2022 10:02:04 +0100 Subject: [PATCH 25/28] XLS vs XLSX errors --- .../Table/0.0.0-dev/src/Io/Excel.enso | 25 +++++++++---------- test/Table_Tests/src/Excel_Spec.enso | 12 ++++++++- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 6c3c8d093eb7..554eaf7f9559 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -1,4 +1,4 @@ -from Standard.Base import Integer, Text, Nothing, Boolean, Illegal_Argument_Error, Any, Error, Panic, File, Vector, False +from Standard.Base import Integer, Text, Nothing, Boolean, Illegal_Argument_Error, Any, Error, Panic, File, Vector, False, IO from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior import Standard.Table.Data.Table @@ -6,9 +6,10 @@ from Standard.Table.Error as Error_Module import Invalid_Location polyglot java import org.enso.table.format.xlsx.Range as Java_Range polyglot java import org.enso.table.format.xlsx.Reader + polyglot java import java.lang.IllegalArgumentException polyglot java import java.io.IOException - +polyglot java import org.apache.poi.UnsupportedFileFormatException ## Specified the part of an Excel Workbook to Read type Excel_Section @@ -69,17 +70,15 @@ type Excel_Range Arguments: - column: 1-based index to check - - limit: maximum valid index, defaults to Excel 2007+ limit of 16,384 - is_valid_column : Integer -> Integer -> Boolean - is_valid_column column (limit=16384) = (column > 0) && (column <= limit) + is_valid_column : Integer -> Boolean + is_valid_column column = (column > 0) && (column <= 16384) ## Validates if a row index (1-based) is within the valid range for Excel. Arguments: - row: 1-based index to check - - limit: maximum valid index, defaults to Excel 2007+ limit of 1,048,576 - is_valid_row : Integer -> Integer -> Boolean - is_valid_row row (limit=1048576) = (row > 0) && (row <= limit) + is_valid_row : Integer -> Boolean + is_valid_row row = (row > 0) && (row <= 1048576) ## Given a column name parse to the index (1-based) or return index unchanged. column_index : (Text|Integer) -> Integer @@ -171,11 +170,11 @@ read_excel file section _ xls_format=False = Excel_Range _ -> Reader.readRange stream address.java_range skip_rows row_limit xls_format Text -> Reader.readRangeByName stream address skip_rows row_limit xls_format - file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause - catch_file_failure = Panic.catch IOException handler=file_failure - bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause) - catch_bad_argument = Panic.catch IllegalArgumentException handler=bad_argument + handle_bad_argument = Panic.catch IllegalArgumentException handler=bad_argument + + bad_format caught_panic = Error.throw (File.Io_Error file caught_panic.payload.cause.getMessage) + handle_bad_format = Panic.catch UnsupportedFileFormatException handler=bad_format - catch_file_failure <| catch_bad_argument <| + File.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format <| file.with_input_stream [File.Option.Read] stream->(stream.with_java_stream reader) diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index 4fd197a614be..05c6feb203d6 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -1,4 +1,4 @@ -from Standard.Base import Nothing, File, Illegal_Argument_Error +from Standard.Base import Nothing, File, Illegal_Argument_Error, True, False import Standard.Base.Data.Time.Date @@ -198,4 +198,14 @@ spec = table_4 = xlsx_sheet.read (File_Format.Excel (Sheet "Sheet1" row_limit=6)) table_4.length . should_equal 6 + Test.group "Problems" <| + Test.specify "should handle non-existing file gracefully" <| + bad_file = Enso_Project.data / "DoesNotExists.xlsx" + bad_file.read (File_Format.Excel (Range "Sheet1!A:C")) . should_fail_with File.File_Not_Found + + Test.specify "should handle wrong xls_format gracefully" <| + xlsx_sheet.read (File_Format.Excel (Range "Sheet1!A:C") True) . should_fail_with File.Io_Error + xls_sheet.read (File_Format.Excel (Range "Sheet1!A:C") False) . should_fail_with File.Io_Error + + main = Test.Suite.run_main here.spec From a6e7f0c74aa0ea869b6607ab40d841126a1a812b Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 6 May 2022 10:07:31 +0100 Subject: [PATCH 26/28] add "constants" --- .../lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 554eaf7f9559..d5efac118c93 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -71,14 +71,18 @@ type Excel_Range Arguments: - column: 1-based index to check is_valid_column : Integer -> Boolean - is_valid_column column = (column > 0) && (column <= 16384) + is_valid_column column = + excel_2007_column_limit = 16384 + (column > 0) && (column <= excel_2007_column_limit) ## Validates if a row index (1-based) is within the valid range for Excel. Arguments: - row: 1-based index to check is_valid_row : Integer -> Boolean - is_valid_row row = (row > 0) && (row <= 1048576) + is_valid_row row = + excel_2007_row_limit = 1048576 + (row > 0) && (row <= excel_2007_row_limit) ## Given a column name parse to the index (1-based) or return index unchanged. column_index : (Text|Integer) -> Integer From 7549773d0c3e95da33571cacd4ccc005eaa0c221 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 6 May 2022 13:20:31 +0100 Subject: [PATCH 27/28] More PR work... --- .../Table/0.0.0-dev/src/Io/Excel.enso | 48 ++++++++++--------- .../org/enso/table/format/xlsx/Range.java | 2 + 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index d5efac118c93..82dbd3af3251 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -11,49 +11,49 @@ polyglot java import java.lang.IllegalArgumentException polyglot java import java.io.IOException polyglot java import org.apache.poi.UnsupportedFileFormatException -## Specified the part of an Excel Workbook to Read type Excel_Section - ## Gets a list of sheets within a workbook + ## Gets a list of sheets within a workbook. type Sheet_Names - ## Gets a list of named ranges within a workbook + ## Gets a list of named ranges within a workbook. type Range_Names - ## Gets the data from a specific sheet. Column names are the Excel column names. + ## Gets the data from a specific sheet. Column names are the Excel column + names. type Sheet (sheet:(Integer|Text)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) - ## Gets a specific range (taking either a defined name or external style address) from the workbook + ## Gets a specific range (taking either a defined name or external style + address) from the workbook. type Range (address:(Text|Excel_Range)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) - -## Specifies a range within Excel and provides various ways of type Excel_Range + ## Specifies a range within an Excel Workbook. type Excel_Range java_range:Java_Range - ## Gets the name of the sheet + ## Gets the name of the sheet. sheet_name : Text sheet_name = this.java_range.getSheetName - ## Gets the index (1-based) of the top row of the range - Return Nothing if referring to a complete column + ## Gets the index (1-based) of the top row of the range. + Returns `Nothing` if referring to a complete column. top_row : Integer | Nothing top_row = if this.java_range.isWholeColumn then Nothing else this.java_range.getTopRow - ## Gets the index (1-based) of the bottom row of the range - Return Nothing if referring to a complete column + ## Gets the index (1-based) of the bottom row of the range. + Returns `Nothing` if referring to a complete column. bottom_row : Integer | Nothing bottom_row = if this.java_range.isWholeColumn then Nothing else this.java_range.getBottomRow - ## Gets the index (1-based) of the left column of the range - Return Nothing if referring to a complete row + ## Gets the index (1-based) of the left column of the range. + Returns `Nothing` if referring to a complete row. left_column : Integer | Nothing left_column = if this.java_range.isWholeRow then Nothing else this.java_range.getLeftColumn - ## Gets the index (1-based) of the right column of the range - Return Nothing if referring to a complete column + ## Gets the index (1-based) of the right column of the range. + Returns `Nothing` if referring to a complete row. right_column : Integer | Nothing right_column = if this.java_range.isWholeRow then Nothing else this.java_range.getRightColumn @@ -62,14 +62,15 @@ type Excel_Range address : Text address = this.java_range.getAddress - ## Display the Excel_Range + ## Displays the Excel_Range. to_text : Text to_text = "Excel_Range " + this.address - ## Validates if a column index (1-based) is within the valid range for Excel. + ## Validates if a column index (1-based) is within the valid range for + Excel. Arguments: - - column: 1-based index to check + - column: 1-based index to check. is_valid_column : Integer -> Boolean is_valid_column column = excel_2007_column_limit = 16384 @@ -78,18 +79,19 @@ type Excel_Range ## Validates if a row index (1-based) is within the valid range for Excel. Arguments: - - row: 1-based index to check + - row: 1-based index to check. is_valid_row : Integer -> Boolean is_valid_row row = excel_2007_row_limit = 1048576 (row > 0) && (row <= excel_2007_row_limit) - ## Given a column name parse to the index (1-based) or return index unchanged. + ## Given a column name, parses to the index (1-based) or return index + unchanged. column_index : (Text|Integer) -> Integer column_index column = if column.is_an Integer then column else Java_Range.parseA1Column column - ## Create a Range from an address + ## Creates a Range from an address. from_address : Text -> Excel_Range from_address address = Panic.catch IllegalArgumentException (Excel_Range (Java_Range.new address)) caught_panic-> @@ -143,7 +145,7 @@ type Excel_Range ## PRIVATE - Wrapper for validation + Wrapper for validation of a value prior to execution. validate : Boolean -> Text -> Any validate validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index e076623b04c4..b31c2b7f620e 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -77,10 +77,12 @@ private static Optional parseRange( } private static boolean isLetter(char c) { + // Cannot use the isLetter function as must be explicitly A-Z. return c >= 'A' && c <= 'Z'; } private static boolean isDigit(char c) { + // Cannot use the isDigit function as must be explicitly 0-9. return c >= '0' && c <= '9'; } From f9a1e745bb7724b8e4ebd08ff3a4cbe6e446bad1 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 6 May 2022 13:34:56 +0100 Subject: [PATCH 28/28] DRY and changelog --- CHANGELOG.md | 2 ++ .../org/enso/table/format/xlsx/Reader.java | 36 ++++++++++--------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d53cbcb9d53e..5367f042a7a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -115,6 +115,7 @@ `with_step` allowing to change the range step.][3408] - [Aligned `Text.split` API with other methods and added `Text.lines`.][3415] - [Implemented a basic reader for the `Delimited` file format.][3424] +- [Implemented a reader for the `Excel` file format.][3425] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -177,6 +178,7 @@ [3408]: https://github.com/enso-org/enso/pull/3408 [3415]: https://github.com/enso-org/enso/pull/3415 [3424]: https://github.com/enso-org/enso/pull/3424 +[3425]: https://github.com/enso-org/enso/pull/3425 #### Enso Compiler diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index 81c32dec5aa5..a66010230818 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -205,15 +205,17 @@ private static Table readSheetToTable( // Row Range int firstRow = sheet.getFirstRowNum() + 1; int lastRow = sheet.getLastRowNum() + 1; - int startRow = (range == null || range.isWholeColumn() ? 1 : range.getTopRow()) + skipRows; - int endRow = range == null || range.isWholeColumn() ? lastRow : range.getBottomRow(); + boolean wholeColumn = range == null || range.isWholeColumn(); + int startRow = (wholeColumn ? 1 : range.getTopRow()) + skipRows; + int endRow = wholeColumn ? lastRow : range.getBottomRow(); // Columns - int startCol = (range == null || range.isWholeRow() ? 1 : range.getLeftColumn()); - int endCol = (range == null || range.isWholeRow() ? -1 : range.getRightColumn()); + boolean wholeRow = range == null || range.isWholeRow(); + int startCol = wholeRow ? 1 : range.getLeftColumn(); + int endCol = wholeRow ? -1 : range.getRightColumn(); int size = Math.min(rowCount, endRow - startRow + 1); List builders = - endCol == -1 + wholeRow ? new ArrayList<>() : IntStream.range(startCol, endCol + 1) .mapToObj(i -> new InferredBuilder(size)) @@ -242,7 +244,7 @@ private static Table readSheetToTable( } // Special case for stopping before firstRow - if (endCol == -1 && (rowCount == 0 || row < firstRow)) { + if (wholeRow && (rowCount == 0 || row < firstRow)) { Row currentRow = sheet.getRow(firstRow - 1); int currentEndCol = currentRow.getLastCellNum() + 1; expandBuilders(builders, size, currentEndCol - startCol, size); @@ -313,7 +315,7 @@ private static Object getCellValue(Cell cell) { * @throws IOException when the input stream cannot be read. */ public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException { - Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); + Workbook workbook = getWorkbook(stream, xls_format); int sheetCount = workbook.getNumberOfSheets(); var output = new String[sheetCount]; for (int i = 0; i < sheetCount; i++) { @@ -331,8 +333,9 @@ public static String[] readSheetNames(InputStream stream, boolean xls_format) th * @throws IOException when the input stream cannot be read. */ public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException { - Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); - return workbook.getAllNames().stream().map(Name::getNameName).toArray(String[]::new); + return getWorkbook(stream, xls_format).getAllNames().stream() + .map(Name::getNameName) + .toArray(String[]::new); } /** @@ -353,7 +356,7 @@ public static Table readSheetByName( Integer row_limit, boolean xls_format) throws IOException, IllegalArgumentException { - Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); + Workbook workbook = getWorkbook(stream, xls_format); int sheetIndex = getSheetIndex(workbook, sheetName); if (sheetIndex == -1) { @@ -382,7 +385,7 @@ public static Table readSheetByName( public static Table readSheetByIndex( InputStream stream, int index, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException, IllegalArgumentException { - Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); + Workbook workbook = getWorkbook(stream, xls_format); int sheetCount = workbook.getNumberOfSheets(); if (index < 1 || index > sheetCount) { @@ -398,7 +401,6 @@ public static Table readSheetByIndex( row_limit == null ? Integer.MAX_VALUE : row_limit); } - /** * Reads a range by name or address for the specified XLSX/XLS file into a table. * @@ -417,14 +419,13 @@ public static Table readRangeByName( Integer row_limit, boolean xls_format) throws IOException { - Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); + Workbook workbook = getWorkbook(stream, xls_format); Name name = workbook.getName(rangeNameOrAddress); Range range = new Range(name == null ? rangeNameOrAddress : name.getRefersToFormula()); return readRange(workbook, range, skip_rows, row_limit); } - /** * Reads a range for the specified XLSX/XLS file into a table. * @@ -439,8 +440,11 @@ public static Table readRangeByName( public static Table readRange( InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException { - Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); - return readRange(workbook, range, skip_rows, row_limit); + return readRange(getWorkbook(stream, xls_format), range, skip_rows, row_limit); + } + + private static Workbook getWorkbook(InputStream stream, boolean xls_format) throws IOException { + return xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); } private static Table readRange(