diff --git a/build.sbt b/build.sbt index 469972d83c93..445516f0a99d 100644 --- a/build.sbt +++ b/build.sbt @@ -1504,6 +1504,7 @@ lazy val runtime = (project in file("engine/runtime")) }.evaluated, Benchmark / parallelExecution := false ) + .dependsOn(`common-polyglot-core-utils`) .dependsOn(`runtime-language-epb`) .dependsOn(`edition-updater`) .dependsOn(`interpreter-dsl`) @@ -2025,22 +2026,37 @@ lazy val `std-base` = project Compile / packageBin / artifactPath := `base-polyglot-root` / "std-base.jar", libraryDependencies ++= Seq( - "com.ibm.icu" % "icu4j" % icuVersion, "org.graalvm.truffle" % "truffle-api" % graalVersion % "provided", "org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided" ), Compile / packageBin := Def.task { val result = (Compile / packageBin).value + val _ensureCoreIsCompiled = + (`common-polyglot-core-utils` / Compile / packageBin).value val _ = StdBits .copyDependencies( `base-polyglot-root`, - Some("std-base.jar"), + Seq("std-base.jar", "common-polyglot-core-utils.jar"), ignoreScalaLibrary = true ) .value result }.value ) + .dependsOn(`common-polyglot-core-utils`) + +lazy val `common-polyglot-core-utils` = project + .in(file("lib/scala/common-polyglot-core-utils")) + .settings( + frgaalJavaCompilerSetting, + autoScalaLibrary := false, + Compile / packageBin / artifactPath := + `base-polyglot-root` / "common-polyglot-core-utils.jar", + libraryDependencies ++= Seq( + "com.ibm.icu" % "icu4j" % icuVersion, + "org.graalvm.truffle" % "truffle-api" % graalVersion % "provided" + ) + ) lazy val `enso-test-java-helpers` = project .in(file("test/Tests/polyglot-sources/enso-test-java-helpers")) @@ -2093,7 +2109,7 @@ lazy val `std-table` = project val _ = StdBits .copyDependencies( `table-polyglot-root`, - Some("std-table.jar"), + Seq("std-table.jar"), ignoreScalaLibrary = true ) .value @@ -2118,7 +2134,7 @@ lazy val `std-image` = project val _ = StdBits .copyDependencies( `image-polyglot-root`, - Some("std-image.jar"), + Seq("std-image.jar"), ignoreScalaLibrary = true ) .value @@ -2143,7 +2159,7 @@ lazy val `std-google-api` = project val _ = StdBits .copyDependencies( `google-api-polyglot-root`, - Some("std-google-api.jar"), + Seq("std-google-api.jar"), ignoreScalaLibrary = true ) .value @@ -2172,7 +2188,7 @@ lazy val `std-database` = project val _ = StdBits .copyDependencies( `database-polyglot-root`, - Some("std-database.jar"), + Seq("std-database.jar"), ignoreScalaLibrary = true ) .value diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java index ca2eed129d75..3f2cffb657d5 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java @@ -14,12 +14,12 @@ import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeFormatterBuilder; import java.time.format.DateTimeParseException; import org.enso.interpreter.dsl.Builtin; import org.enso.interpreter.runtime.EnsoContext; import org.enso.interpreter.runtime.data.text.Text; import org.enso.interpreter.runtime.library.dispatch.TypesLibrary; +import org.enso.polyglot.common_utils.Core_Date_Utils; @ExportLibrary(InteropLibrary.class) @ExportLibrary(TypesLibrary.class) @@ -71,12 +71,7 @@ public static EnsoDateTime now() { @Builtin.WrapException(from = DateTimeParseException.class) @CompilerDirectives.TruffleBoundary public static EnsoDateTime parse(String text) { - String iso = text; - if (text != null && text.length() > 10 && text.charAt(10) == ' ') { - var builder = new StringBuilder(iso); - builder.replace(10, 11, "T"); - iso = builder.toString(); - } + String iso = Core_Date_Utils.normaliseISODateTime(text); var datetime = DATE_TIME_FORMATTER.parseBest(iso, ZonedDateTime::from, LocalDateTime::from); if (datetime instanceof ZonedDateTime zdt) { @@ -273,17 +268,5 @@ public Object toDisplayString(boolean allowSideEffects) { private static final EnsoDateTime epochStart = EnsoDateTime.create(1582, 10, 15, 0, 0, 0, 0, EnsoTimeZone.parse("UTC")); - private static final DateTimeFormatter DATE_TIME_FORMATTER = - new DateTimeFormatterBuilder() - .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .optionalStart() - .parseLenient() - .appendOffsetId() - .optionalEnd() - .optionalStart() - .appendLiteral('[') - .parseCaseSensitive() - .appendZoneRegionId() - .appendLiteral(']') - .toFormatter(); + private static final DateTimeFormatter DATE_TIME_FORMATTER = Core_Date_Utils.defaultZonedDateTimeFormatter(); } diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java index 8b24fd190f57..fd920193c5da 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java @@ -1,6 +1,5 @@ package org.enso.interpreter.runtime.data.text; -import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.Normalizer2; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.dsl.Cached; @@ -19,6 +18,7 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.enso.interpreter.dsl.Builtin; +import org.enso.polyglot.common_utils.Core_Text_Utils; /** The main runtime type for Enso's Text. */ @ExportLibrary(InteropLibrary.class) @@ -178,13 +178,7 @@ String asString(@Cached("build()") @Cached.Shared("strings") ToJavaStringNode to @CompilerDirectives.TruffleBoundary private int computeLength() { - BreakIterator iter = BreakIterator.getCharacterInstance(); - iter.setText(toString()); - int len = 0; - while (iter.next() != BreakIterator.DONE) { - len++; - } - return len; + return Core_Text_Utils.computeGraphemeLength(toString()); } @CompilerDirectives.TruffleBoundary @@ -193,37 +187,7 @@ String toDisplayString( boolean allowSideEffects, @Cached("build()") @Cached.Shared("strings") ToJavaStringNode toJavaStringNode) { String str = toJavaStringNode.execute(this); - int len = str.length(); - int outputLength = len + 2; // Precise if there are no special characters. - - // TODO This should be more extensible; while it's still a small fixed set, - // a switch is probably fastest (unconfirmed) - - StringBuffer strBuf = new StringBuffer(outputLength); - - strBuf.append('\''); - - for (int i = 0; i < len; ++i) { - char c = str.charAt(i); - switch (c) { - case '\\' -> strBuf.append("\\\\"); - case '\'' -> strBuf.append("\\'"); - case '\n' -> strBuf.append("\\n"); - case '\t' -> strBuf.append("\\t"); - case '\0' -> strBuf.append("\\0"); - case '\u0007' -> strBuf.append("\\a"); - case '\u0008' -> strBuf.append("\\b"); - case '\u000c' -> strBuf.append("\\f"); - case '\r' -> strBuf.append("\\r"); - case '\u000B' -> strBuf.append("\\v"); - case '\u001B' -> strBuf.append("\\e"); - default -> strBuf.append(c); - } - } - - strBuf.append('\''); - - return strBuf.toString(); + return Core_Text_Utils.prettyPrint(str); } @ExportMessage diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java new file mode 100644 index 000000000000..3cf2ca71b656 --- /dev/null +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java @@ -0,0 +1,29 @@ +package org.enso.polyglot.common_utils; + +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; + +public class Core_Date_Utils { + /** + * Replace space with T in ISO date time string to make it compatible with ISO format. + * @param dateString Raw date time string with either space or T as separator + * @return ISO format date time string + */ + public static String normaliseISODateTime(String dateString) { + if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') { + var builder = new StringBuilder(dateString); + builder.replace(10, 11, "T"); + return builder.toString(); + } + + return dateString; + } + + /** @return default Date Time formatter for parsing a Date_Time. */ + public static DateTimeFormatter defaultZonedDateTimeFormatter() { + return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .optionalStart().parseLenient().appendOffsetId().optionalEnd() + .optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']') + .toFormatter(); + } +} diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Text_Utils.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Text_Utils.java new file mode 100644 index 000000000000..f2a692d84f6e --- /dev/null +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Text_Utils.java @@ -0,0 +1,51 @@ +package org.enso.polyglot.common_utils; + +import com.ibm.icu.text.BreakIterator; + +public class Core_Text_Utils { + /** Computes the length of the string as the number of grapheme clusters it contains. */ + public static int computeGraphemeLength(String text) { + BreakIterator iter = BreakIterator.getCharacterInstance(); + iter.setText(text); + int len = 0; + while (iter.next() != BreakIterator.DONE) { + len++; + } + return len; + } + + /** Pretty prints the string, escaping special characters. */ + public static String prettyPrint(String str) { + int len = str.length(); + int outputLength = len + 2; // Precise if there are no special characters. + + // TODO This should be more extensible; while it's still a small fixed set, + // a switch is probably fastest (unconfirmed) + + StringBuilder sb = new StringBuilder(outputLength); + + sb.append('\''); + + for (int i = 0; i < len; ++i) { + char c = str.charAt(i); + switch (c) { + case '\\' -> sb.append("\\\\"); + case '\'' -> sb.append("\\'"); + case '\n' -> sb.append("\\n"); + case '\t' -> sb.append("\\t"); + case '\0' -> sb.append("\\0"); + case '\u0007' -> sb.append("\\a"); + case '\u0008' -> sb.append("\\b"); + case '\u000c' -> sb.append("\\f"); + case '\r' -> sb.append("\\r"); + case '\u000B' -> sb.append("\\v"); + case '\u001B' -> sb.append("\\e"); + default -> sb.append(c); + } + } + + sb.append('\''); + + return sb.toString(); + } +} diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/package-info.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/package-info.java new file mode 100644 index 000000000000..eb2931c17036 --- /dev/null +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/package-info.java @@ -0,0 +1,20 @@ +/** + * This package contains common utilities which can be used both by the engine runtime and the libraries. + *

+ * This allows us to avoid duplicating code between the runtime and library projects for operations that need to be + * accessible on both sides. + *

+ * The utilities that belong here are mostly operations that are builtins of the Enso language but also need to be used + * from our Java libraries where the cost of calling back to Enso methods is relatively high, so accessing the Java + * implementations directly is desirable. The primary example of that is the algorithm for computing the length of a + * string by counting the grapheme clusters. + *

+ * Due to classpath separation, the class files of this package will be duplicated with one copy embedded in the engine + * and another attached as `common-polyglot-core-utils.jar` placed in the `polyglot` directory of the Standard.Base + * library. But it allows us to avoid duplicating the code, so we can have a single source of truth for each + * implementation. + *

+ * Due to the copying, the project should not be expanded too much, but all utilities which would end up being + * duplicated are best moved here. + */ +package org.enso.polyglot.common_utils; diff --git a/project/StdBits.scala b/project/StdBits.scala index 51b36f4361ec..06593f15f3cd 100644 --- a/project/StdBits.scala +++ b/project/StdBits.scala @@ -12,16 +12,17 @@ object StdBits { * directory. * * @param destination location where to put the dependencies - * @param baseJarName name of the base generated JAR (if any); unexpected - * (old) files are removed, so this task needs to know - * this file's name to avoid removing it + * @param providedJarNames name of JARs generated by the local projects; + * unexpected (old) files are removed, so this task + * needs to know these files' names to avoid removing + * them * @param ignoreScalaLibrary whether to ignore Scala dependencies that are * added by default be SBT and are not relevant in * pure-Java projects */ def copyDependencies( destination: File, - baseJarName: Option[String], + providedJarNames: Seq[String], ignoreScalaLibrary: Boolean ): Def.Initialize[Task[Unit]] = Def.task { @@ -50,7 +51,7 @@ object StdBits { Tracked.diffInputs(dependencyStore, FileInfo.hash)(relevantFiles.toSet) { report => val expectedFileNames = - report.checked.map(file => file.getName) ++ baseJarName.toSeq + report.checked.map(file => file.getName) ++ providedJarNames for (existing <- IO.listFiles(destination)) { if (!expectedFileNames.contains(existing.getName)) { log.info( diff --git a/std-bits/base/src/main/java/org/enso/base/Text_Utils.java b/std-bits/base/src/main/java/org/enso/base/Text_Utils.java index 923fbb507ada..2da16c249bab 100644 --- a/std-bits/base/src/main/java/org/enso/base/Text_Utils.java +++ b/std-bits/base/src/main/java/org/enso/base/Text_Utils.java @@ -13,6 +13,7 @@ import org.enso.base.text.CaseFoldedString.Grapheme; import org.enso.base.text.GraphemeSpan; import org.enso.base.text.Utf16Span; +import org.enso.polyglot.common_utils.Core_Text_Utils; /** Utils for standard library operations on Text. */ public class Text_Utils { @@ -268,14 +269,8 @@ public static long char_length(String str) { * @param str the string to measure * @return length of the string */ - private static long grapheme_length(String str) { - BreakIterator iter = BreakIterator.getCharacterInstance(); - iter.setText(str); - long len = 0; - while (iter.next() != BreakIterator.DONE) { - len++; - } - return len; + public static long grapheme_length(String str) { + return Core_Text_Utils.computeGraphemeLength(str); } /** Returns a prefix of the string not exceeding the provided grapheme length. */ @@ -566,4 +561,9 @@ public static String replace_spans(String str, List spans, String new sb.append(str, current_ix, str.length()); return sb.toString(); } + + /** Pretty prints the string, escaping special characters. */ + public static String pretty_print(String str) { + return Core_Text_Utils.prettyPrint(str); + } } diff --git a/std-bits/base/src/main/java/org/enso/base/Time_Utils.java b/std-bits/base/src/main/java/org/enso/base/Time_Utils.java index 49729a81fd43..37fbb1dec25f 100644 --- a/std-bits/base/src/main/java/org/enso/base/Time_Utils.java +++ b/std-bits/base/src/main/java/org/enso/base/Time_Utils.java @@ -4,6 +4,7 @@ import org.enso.base.time.Date_Utils; import org.enso.base.time.TimeUtilsBase; import org.enso.base.time.Time_Of_Day_Utils; +import org.enso.polyglot.common_utils.Core_Date_Utils; import org.graalvm.polyglot.Value; import java.time.DateTimeException; @@ -74,10 +75,7 @@ public static boolean is_iso_datetime_based(String format) { /** @return default Date Time formatter for parsing a Date_Time. */ public static DateTimeFormatter default_zoned_date_time_formatter() { - return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .optionalStart().parseLenient().appendOffsetId().optionalEnd() - .optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']') - .toFormatter(); + return Core_Date_Utils.defaultZonedDateTimeFormatter(); } /** @return default Date Time formatter for writing a Date_Time. */ @@ -94,13 +92,7 @@ public static DateTimeFormatter default_output_date_time_formatter() { * @return ISO format date time string */ public static String normaliseISODateTime(String dateString) { - if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') { - var builder = new StringBuilder(dateString); - builder.replace(10, 11, "T"); - return builder.toString(); - } - - return dateString; + return Core_Date_Utils.normaliseISODateTime(dateString); } public static String local_date_format(LocalDate date, Object format) { diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java index 324400334e80..b1a03e0cb67f 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java @@ -1,6 +1,7 @@ package org.enso.table.aggregations; import com.ibm.icu.text.BreakIterator; +import org.enso.base.Text_Utils; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; @@ -26,12 +27,12 @@ public Object aggregate(List indexes) { for (int row : indexes) { Object value = storage.getItemBoxed(row); if (value != null) { - if (!(value instanceof String)) { + if (!(value instanceof String asString)) { this.addProblem(new InvalidAggregation(this.getName(), row, "Not a text value.")); return null; } - long valueLength = GraphemeLength((String) value); + long valueLength = Text_Utils.grapheme_length(asString); if (current == null || Long.compare(valueLength, length) == minOrMax) { length = valueLength; current = value; @@ -41,16 +42,4 @@ public Object aggregate(List indexes) { return current; } - - private static long GraphemeLength(String text) { - BreakIterator iter = BreakIterator.getCharacterInstance(); - iter.setText(text); - - int count = 0; - for (int end = iter.next(); end != BreakIterator.DONE; end = iter.next()) { - count++; - } - - return count; - } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java index 04e68dbc9b7e..bb5a88474169 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java @@ -1,5 +1,6 @@ package org.enso.table.data.table; +import org.enso.base.Text_Utils; import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.storage.BoolStorage; @@ -40,8 +41,8 @@ public static void ensureNameIsValid(String name) { throw new IllegalArgumentException("Column name cannot be empty."); } if (name.indexOf('\0') >= 0) { - // TODO pretty? - throw new IllegalArgumentException("Column name "+name+" must not contain the NUL character."); + String pretty = Text_Utils.pretty_print(name); + throw new IllegalArgumentException("Column name "+pretty+" must not contain the NUL character."); } }