Skip to content

Commit

Permalink
Java tidy and restructure to allow range names and addresses.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdunkerley committed May 4, 2022
1 parent 3d08a9f commit e8f87a7
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 68 deletions.
15 changes: 9 additions & 6 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ from Standard.Base import Integer, Text, Nothing, Boolean, Illegal_Argument_Erro
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior

import Standard.Table.Data.Table
from Standard.Table.Error as Error_Module import Invalid_Location

polyglot java import org.enso.table.format.xlsx.Range as Java_Range
polyglot java import org.enso.table.format.xlsx.Reader
Expand Down Expand Up @@ -133,7 +134,7 @@ validate ~validation ~error_message ~wrapped =
if validation then wrapped else Error.throw (Illegal_Argument_Error error_message)

read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector)
read_excel file section on_problems xls_format=False =
read_excel file section _ xls_format=False =
reader stream = case section of
Sheet_Names -> Vector.Vector (Reader.readSheetNames stream xls_format)
Range_Names -> Vector.Vector (Reader.readRangeNames stream xls_format)
Expand All @@ -143,10 +144,12 @@ read_excel file section on_problems xls_format=False =
Reader.readSheetByName stream sheet skip_rows row_limit xls_format
Range address skip_rows row_limit ->
Table.Table <|
range = (if address.is_an Excel_Range then address else Excel_Range.from_address address)
Reader.readRange stream range.java_range skip_rows row_limit xls_format
if address.is_an Excel_Range then Reader.readRange stream range.java_range skip_rows row_limit xls_format else
Reader.readRangeByName stream range skip_rows row_limit xls_format

file_failure caught_panic = File.wrap_io_exception file caught_panic.payload.cause.getCause
Panic.catch IOException handler=file_failure <|
file.with_input_stream [File.Option.Read] stream->
stream.with_java_stream reader
bad_argument caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause)

Panic.catch IllegalArgumentException handler= <|
Panic.catch IOException handler=file_failure <|
file.with_input_stream [File.Option.Read] stream->stream.with_java_stream reader
104 changes: 60 additions & 44 deletions std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
public class Range {
private static final Pattern FULL_ADDRESS = Pattern.compile("^('.+'|[^'!]+)!(.+)$");

private static String[] parseFullAddress(String fullAddress) {
private static String[] parseFullAddress(String fullAddress) throws IllegalArgumentException {
if (fullAddress == null) {
throw new IllegalArgumentException("fullAddress cannot be NULL.");
}
Expand All @@ -37,7 +37,7 @@ private static String[] parseFullAddress(String fullAddress) {
private static final Pattern RANGE_RC =
Pattern.compile("^(" + ADDRESS_RC + ")(?::(" + ADDRESS_RC + "))?$");

private static int[] parseRange(String range) {
private static int[] parseRange(String range) throws IllegalArgumentException {
for (Pattern pattern : new Pattern[] {RANGE_A1, RANGE_COL, RANGE_ROW, RANGE_RC}) {
Optional<int[]> parsed =
parseRange(range, pattern, pattern == RANGE_RC ? Range::parseRC : Range::parseA1);
Expand Down Expand Up @@ -72,7 +72,9 @@ private static Optional<int[]> parseRange(
});
}

private static boolean isLetter(char c) { return c >= 'A' && c <= 'Z'; }
private static boolean isLetter(char c) {
return c >= 'A' && c <= 'Z';
}

private static boolean isDigit(char c) {
return c >= '0' && c <= '9';
Expand All @@ -86,70 +88,82 @@ private static int skipDollar(CharSequence address, int index) {
}

private static int[] parseA1(CharSequence address) {
int col = 0;

int index = skipDollar(address, 0);
while (index < address.length() && isLetter(address.charAt(index))) {
col = 26 * col + (address.charAt(index) - 'A' + 1);
index++;
}

index = skipDollar(address, index);
int row = index < address.length() ? Integer.parseInt(address, index, address.length(), 10) : 0;
return new int[] {row, col};
ParsedInteger col = parseColumn(address);
ParsedInteger row = parseInteger(address, skipDollar(address, col.index));
return new int[] {row.value, col.value};
}

private static int[] parseRC(CharSequence address) {
private static int[] parseRC(CharSequence address) throws IllegalArgumentException {
int index = 0;

int row = 0;
if (index < address.length() && address.charAt(index) == 'R') {
// Parse Row
int endIndex = index + 1;
while (endIndex < address.length() && isDigit(address.charAt(endIndex))) {
endIndex++;
ParsedInteger parsed = parseInteger(address, index + 1);
if (parsed.value == 0) {
throw new IllegalArgumentException(address + " not an absolute R1C1 style addresses.");
}

if (endIndex == index + 1) {
throw new IllegalArgumentException("R1C1 style addresses must be absolute.");
}

row = Integer.parseInt(address, index + 1, endIndex, 10);
index = endIndex;
row = parsed.value;
index = parsed.index;
}

int col = 0;
if (index < address.length() && address.charAt(index) == 'C') {
// Parse Row
int endIndex = index + 1;
while (endIndex < address.length() && isDigit(address.charAt(endIndex))) {
endIndex++;
}

if (endIndex == index + 1) {
throw new IllegalArgumentException("R1C1 style addresses must be absolute.");
ParsedInteger parsed = parseInteger(address, index + 1);
if (parsed.value == 0) {
throw new IllegalArgumentException(address + " not an absolute R1C1 style addresses.");
}

col = Integer.parseInt(address, index + 1, endIndex, 10);
col = parsed.value;
}

return new int[] {row, col};
}

public static int parseA1Column(CharSequence column) {
/**
* Convert an Excel Column Name (e.g. DCR) into the index (1-based)
*
* @param column name
* @return Column index (A=1 ...)
*/
public static int parseA1Column(CharSequence column) throws IllegalArgumentException {
ParsedInteger parsed = parseColumn(column);
if (parsed.index != column.length() || parsed.value == 0) {
throw new IllegalArgumentException(column + " is not a valid Excel Column Name.");
}

return parsed.value;
}

private static class ParsedInteger {
public final int index;
public final int value;

public ParsedInteger(int index, int value) {
this.index = index;
this.value = value;
}
}

private static ParsedInteger parseInteger(CharSequence address, int index) {
int endIndex = index;
while (endIndex < address.length() && isDigit(address.charAt(endIndex))) {
endIndex++;
}
return new ParsedInteger(endIndex, Integer.parseInt(address, index + 1, endIndex, 10));
}

private static ParsedInteger parseColumn(CharSequence column) {
int col = 0;

int index = 0;
int index = skipDollar(column, 0);

while (index < column.length() && isLetter(column.charAt(index))) {
col = 26 * col + (column.charAt(index) - 'A' + 1);
index++;
}

if (index != column.length()) {
return -1;
}

return col;
return new ParsedInteger(index, col);
}

private final String sheetName;
Expand All @@ -158,7 +172,7 @@ public static int parseA1Column(CharSequence column) {
private final int topRow;
private final int bottomRow;

public Range(String fullAddress) {
public Range(String fullAddress) throws IllegalArgumentException {
String[] sheetAndRange = parseFullAddress(fullAddress);
this.sheetName = sheetAndRange[0].replaceAll("^'(.*)'$", "$1").replaceAll("''", "'");

Expand Down Expand Up @@ -215,8 +229,10 @@ public String getAddress() {
(isWholeRow() ? "" : CellReference.convertNumToColString(getLeftColumn() - 1))
+ (isWholeColumn() ? "" : Integer.toString(getTopRow()));
if (getLeftColumn() != getRightColumn() || getTopRow() != getBottomRow()) {
range += ":" + (isWholeRow() ? "" : CellReference.convertNumToColString(getRightColumn() - 1))
+ (isWholeColumn() ? "" : Integer.toString(getBottomRow()));
range +=
":"
+ (isWholeRow() ? "" : CellReference.convertNumToColString(getRightColumn() - 1))
+ (isWholeColumn() ? "" : Integer.toString(getBottomRow()));
}

return sheetNameEscaped + "!" + range;
Expand Down
48 changes: 30 additions & 18 deletions std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class Reader {
* @param hasHeaders specifies whether the first non-empty row of the sheet should be used for
* column names.
* @param unnamedColumnPrefix specifies the prefix to use for missing columns.
* @param mkDate a function converting java-based dates into a format understandable by the
* @param mkDate a function converting Java-based dates into a format understandable by the
* caller.
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
Expand Down Expand Up @@ -77,7 +77,7 @@ public static Table read_xlsx(
* @param hasHeaders specifies whether the first non-empty row of the sheet should be used for
* column names.
* @param unnamedColumnPrefix specifies the prefix to use for missing columns.
* @param mkDate a function converting java-based dates into a format understandable by the
* @param mkDate a function converting Java-based dates into a format understandable by the
* caller.
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
Expand Down Expand Up @@ -198,7 +198,10 @@ private static Table read_table(
return new Table(columns);
}

private static Table readSheetToTable(Sheet sheet, Range range, int skipRows, int rowCount) {
private static Table readSheetToTable(
Workbook workbook, int sheetIndex, Range range, int skipRows, int rowCount) {
Sheet sheet = workbook.getSheetAt(sheetIndex);

// Row Range
int firstRow = sheet.getFirstRowNum() + 1;
int lastRow = sheet.getLastRowNum() + 1;
Expand Down Expand Up @@ -255,15 +258,6 @@ private static Table readSheetToTable(Sheet sheet, Range range, int skipRows, in
return new Table(columns);
}

private static String getRefersTo(Workbook workbook, String rangeName) {
for (Name name : workbook.getAllNames()) {
if (name.getNameName().equalsIgnoreCase(rangeName)) {
return name.getRefersToFormula();
}
}
return null;
}

private static int getSheetIndex(Workbook workbook, String sheetName) {
int sheetCount = workbook.getNumberOfSheets();
for (int i = 0; i < sheetCount; i++) {
Expand Down Expand Up @@ -328,9 +322,9 @@ public static Table readSheetByName(
throw new IllegalArgumentException("Unknown sheet '" + sheetName + "'.");
}

Sheet sheet = workbook.getSheetAt(sheetIndex);
return readSheetToTable(
sheet,
workbook,
sheetIndex,
null,
skip_rows == null ? 0 : skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit);
Expand All @@ -347,27 +341,45 @@ public static Table readSheetByIndex(
"Sheet index is not in valid range (1 to " + sheetCount + " inclusive).");
}

Sheet sheet = workbook.getSheetAt(index - 1);
return readSheetToTable(
sheet,
workbook,
index - 1,
null,
skip_rows == null ? 0 : skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit);
}

public static Table readRangeByName(
InputStream stream,
String rangeNameOrAddress,
Integer skip_rows,
Integer row_limit,
boolean xls_format)
throws IOException {
Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);

Name name = workbook.getName(rangeNameOrAddress);
Range range = new Range(name == null ? rangeNameOrAddress : name.getRefersToFormula());
return readRange(workbook, range, skip_rows, row_limit);
}

public static Table readRange(
InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format)
throws IOException {
Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);
return readRange(workbook, range, skip_rows, row_limit);
}

private static Table readRange(
Workbook workbook, Range range, Integer skip_rows, Integer row_limit) {
int sheetIndex = getSheetIndex(workbook, range.getSheetName());
if (sheetIndex == -1) {
throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'.");
}

Sheet sheet = workbook.getSheetAt(sheetIndex);
return readSheetToTable(
sheet,
workbook,
sheetIndex,
range,
skip_rows == null ? 0 : skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit);
Expand Down

0 comments on commit e8f87a7

Please sign in to comment.