diff --git a/build.sbt b/build.sbt index 469972d83c93..445516f0a99d 100644 --- a/build.sbt +++ b/build.sbt @@ -1504,6 +1504,7 @@ lazy val runtime = (project in file("engine/runtime")) }.evaluated, Benchmark / parallelExecution := false ) + .dependsOn(`common-polyglot-core-utils`) .dependsOn(`runtime-language-epb`) .dependsOn(`edition-updater`) .dependsOn(`interpreter-dsl`) @@ -2025,22 +2026,37 @@ lazy val `std-base` = project Compile / packageBin / artifactPath := `base-polyglot-root` / "std-base.jar", libraryDependencies ++= Seq( - "com.ibm.icu" % "icu4j" % icuVersion, "org.graalvm.truffle" % "truffle-api" % graalVersion % "provided", "org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided" ), Compile / packageBin := Def.task { val result = (Compile / packageBin).value + val _ensureCoreIsCompiled = + (`common-polyglot-core-utils` / Compile / packageBin).value val _ = StdBits .copyDependencies( `base-polyglot-root`, - Some("std-base.jar"), + Seq("std-base.jar", "common-polyglot-core-utils.jar"), ignoreScalaLibrary = true ) .value result }.value ) + .dependsOn(`common-polyglot-core-utils`) + +lazy val `common-polyglot-core-utils` = project + .in(file("lib/scala/common-polyglot-core-utils")) + .settings( + frgaalJavaCompilerSetting, + autoScalaLibrary := false, + Compile / packageBin / artifactPath := + `base-polyglot-root` / "common-polyglot-core-utils.jar", + libraryDependencies ++= Seq( + "com.ibm.icu" % "icu4j" % icuVersion, + "org.graalvm.truffle" % "truffle-api" % graalVersion % "provided" + ) + ) lazy val `enso-test-java-helpers` = project .in(file("test/Tests/polyglot-sources/enso-test-java-helpers")) @@ -2093,7 +2109,7 @@ lazy val `std-table` = project val _ = StdBits .copyDependencies( `table-polyglot-root`, - Some("std-table.jar"), + Seq("std-table.jar"), ignoreScalaLibrary = true ) .value @@ -2118,7 +2134,7 @@ lazy val `std-image` = project val _ = StdBits .copyDependencies( `image-polyglot-root`, - Some("std-image.jar"), + Seq("std-image.jar"), ignoreScalaLibrary = true ) .value @@ -2143,7 +2159,7 @@ lazy val `std-google-api` = project val _ = StdBits .copyDependencies( `google-api-polyglot-root`, - Some("std-google-api.jar"), + Seq("std-google-api.jar"), ignoreScalaLibrary = true ) .value @@ -2172,7 +2188,7 @@ lazy val `std-database` = project val _ = StdBits .copyDependencies( `database-polyglot-root`, - Some("std-database.jar"), + Seq("std-database.jar"), ignoreScalaLibrary = true ) .value diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java index ca2eed129d75..3f2cffb657d5 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDateTime.java @@ -14,12 +14,12 @@ import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeFormatterBuilder; import java.time.format.DateTimeParseException; import org.enso.interpreter.dsl.Builtin; import org.enso.interpreter.runtime.EnsoContext; import org.enso.interpreter.runtime.data.text.Text; import org.enso.interpreter.runtime.library.dispatch.TypesLibrary; +import org.enso.polyglot.common_utils.Core_Date_Utils; @ExportLibrary(InteropLibrary.class) @ExportLibrary(TypesLibrary.class) @@ -71,12 +71,7 @@ public static EnsoDateTime now() { @Builtin.WrapException(from = DateTimeParseException.class) @CompilerDirectives.TruffleBoundary public static EnsoDateTime parse(String text) { - String iso = text; - if (text != null && text.length() > 10 && text.charAt(10) == ' ') { - var builder = new StringBuilder(iso); - builder.replace(10, 11, "T"); - iso = builder.toString(); - } + String iso = Core_Date_Utils.normaliseISODateTime(text); var datetime = DATE_TIME_FORMATTER.parseBest(iso, ZonedDateTime::from, LocalDateTime::from); if (datetime instanceof ZonedDateTime zdt) { @@ -273,17 +268,5 @@ public Object toDisplayString(boolean allowSideEffects) { private static final EnsoDateTime epochStart = EnsoDateTime.create(1582, 10, 15, 0, 0, 0, 0, EnsoTimeZone.parse("UTC")); - private static final DateTimeFormatter DATE_TIME_FORMATTER = - new DateTimeFormatterBuilder() - .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .optionalStart() - .parseLenient() - .appendOffsetId() - .optionalEnd() - .optionalStart() - .appendLiteral('[') - .parseCaseSensitive() - .appendZoneRegionId() - .appendLiteral(']') - .toFormatter(); + private static final DateTimeFormatter DATE_TIME_FORMATTER = Core_Date_Utils.defaultZonedDateTimeFormatter(); } diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java index 8b24fd190f57..fd920193c5da 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/text/Text.java @@ -1,6 +1,5 @@ package org.enso.interpreter.runtime.data.text; -import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.Normalizer2; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.dsl.Cached; @@ -19,6 +18,7 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.enso.interpreter.dsl.Builtin; +import org.enso.polyglot.common_utils.Core_Text_Utils; /** The main runtime type for Enso's Text. */ @ExportLibrary(InteropLibrary.class) @@ -178,13 +178,7 @@ String asString(@Cached("build()") @Cached.Shared("strings") ToJavaStringNode to @CompilerDirectives.TruffleBoundary private int computeLength() { - BreakIterator iter = BreakIterator.getCharacterInstance(); - iter.setText(toString()); - int len = 0; - while (iter.next() != BreakIterator.DONE) { - len++; - } - return len; + return Core_Text_Utils.computeGraphemeLength(toString()); } @CompilerDirectives.TruffleBoundary @@ -193,37 +187,7 @@ String toDisplayString( boolean allowSideEffects, @Cached("build()") @Cached.Shared("strings") ToJavaStringNode toJavaStringNode) { String str = toJavaStringNode.execute(this); - int len = str.length(); - int outputLength = len + 2; // Precise if there are no special characters. - - // TODO This should be more extensible; while it's still a small fixed set, - // a switch is probably fastest (unconfirmed) - - StringBuffer strBuf = new StringBuffer(outputLength); - - strBuf.append('\''); - - for (int i = 0; i < len; ++i) { - char c = str.charAt(i); - switch (c) { - case '\\' -> strBuf.append("\\\\"); - case '\'' -> strBuf.append("\\'"); - case '\n' -> strBuf.append("\\n"); - case '\t' -> strBuf.append("\\t"); - case '\0' -> strBuf.append("\\0"); - case '\u0007' -> strBuf.append("\\a"); - case '\u0008' -> strBuf.append("\\b"); - case '\u000c' -> strBuf.append("\\f"); - case '\r' -> strBuf.append("\\r"); - case '\u000B' -> strBuf.append("\\v"); - case '\u001B' -> strBuf.append("\\e"); - default -> strBuf.append(c); - } - } - - strBuf.append('\''); - - return strBuf.toString(); + return Core_Text_Utils.prettyPrint(str); } @ExportMessage diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java new file mode 100644 index 000000000000..3cf2ca71b656 --- /dev/null +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java @@ -0,0 +1,29 @@ +package org.enso.polyglot.common_utils; + +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; + +public class Core_Date_Utils { + /** + * Replace space with T in ISO date time string to make it compatible with ISO format. + * @param dateString Raw date time string with either space or T as separator + * @return ISO format date time string + */ + public static String normaliseISODateTime(String dateString) { + if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') { + var builder = new StringBuilder(dateString); + builder.replace(10, 11, "T"); + return builder.toString(); + } + + return dateString; + } + + /** @return default Date Time formatter for parsing a Date_Time. */ + public static DateTimeFormatter defaultZonedDateTimeFormatter() { + return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .optionalStart().parseLenient().appendOffsetId().optionalEnd() + .optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']') + .toFormatter(); + } +} diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Text_Utils.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Text_Utils.java new file mode 100644 index 000000000000..f2a692d84f6e --- /dev/null +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Text_Utils.java @@ -0,0 +1,51 @@ +package org.enso.polyglot.common_utils; + +import com.ibm.icu.text.BreakIterator; + +public class Core_Text_Utils { + /** Computes the length of the string as the number of grapheme clusters it contains. */ + public static int computeGraphemeLength(String text) { + BreakIterator iter = BreakIterator.getCharacterInstance(); + iter.setText(text); + int len = 0; + while (iter.next() != BreakIterator.DONE) { + len++; + } + return len; + } + + /** Pretty prints the string, escaping special characters. */ + public static String prettyPrint(String str) { + int len = str.length(); + int outputLength = len + 2; // Precise if there are no special characters. + + // TODO This should be more extensible; while it's still a small fixed set, + // a switch is probably fastest (unconfirmed) + + StringBuilder sb = new StringBuilder(outputLength); + + sb.append('\''); + + for (int i = 0; i < len; ++i) { + char c = str.charAt(i); + switch (c) { + case '\\' -> sb.append("\\\\"); + case '\'' -> sb.append("\\'"); + case '\n' -> sb.append("\\n"); + case '\t' -> sb.append("\\t"); + case '\0' -> sb.append("\\0"); + case '\u0007' -> sb.append("\\a"); + case '\u0008' -> sb.append("\\b"); + case '\u000c' -> sb.append("\\f"); + case '\r' -> sb.append("\\r"); + case '\u000B' -> sb.append("\\v"); + case '\u001B' -> sb.append("\\e"); + default -> sb.append(c); + } + } + + sb.append('\''); + + return sb.toString(); + } +} diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/package-info.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/package-info.java new file mode 100644 index 000000000000..eb2931c17036 --- /dev/null +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/package-info.java @@ -0,0 +1,20 @@ +/** + * This package contains common utilities which can be used both by the engine runtime and the libraries. + *
+ * This allows us to avoid duplicating code between the runtime and library projects for operations that need to be + * accessible on both sides. + *
+ * The utilities that belong here are mostly operations that are builtins of the Enso language but also need to be used + * from our Java libraries where the cost of calling back to Enso methods is relatively high, so accessing the Java + * implementations directly is desirable. The primary example of that is the algorithm for computing the length of a + * string by counting the grapheme clusters. + *
+ * Due to classpath separation, the class files of this package will be duplicated with one copy embedded in the engine + * and another attached as `common-polyglot-core-utils.jar` placed in the `polyglot` directory of the Standard.Base + * library. But it allows us to avoid duplicating the code, so we can have a single source of truth for each + * implementation. + *
+ * Due to the copying, the project should not be expanded too much, but all utilities which would end up being
+ * duplicated are best moved here.
+ */
+package org.enso.polyglot.common_utils;
diff --git a/project/StdBits.scala b/project/StdBits.scala
index 51b36f4361ec..06593f15f3cd 100644
--- a/project/StdBits.scala
+++ b/project/StdBits.scala
@@ -12,16 +12,17 @@ object StdBits {
* directory.
*
* @param destination location where to put the dependencies
- * @param baseJarName name of the base generated JAR (if any); unexpected
- * (old) files are removed, so this task needs to know
- * this file's name to avoid removing it
+ * @param providedJarNames name of JARs generated by the local projects;
+ * unexpected (old) files are removed, so this task
+ * needs to know these files' names to avoid removing
+ * them
* @param ignoreScalaLibrary whether to ignore Scala dependencies that are
* added by default be SBT and are not relevant in
* pure-Java projects
*/
def copyDependencies(
destination: File,
- baseJarName: Option[String],
+ providedJarNames: Seq[String],
ignoreScalaLibrary: Boolean
): Def.Initialize[Task[Unit]] =
Def.task {
@@ -50,7 +51,7 @@ object StdBits {
Tracked.diffInputs(dependencyStore, FileInfo.hash)(relevantFiles.toSet) {
report =>
val expectedFileNames =
- report.checked.map(file => file.getName) ++ baseJarName.toSeq
+ report.checked.map(file => file.getName) ++ providedJarNames
for (existing <- IO.listFiles(destination)) {
if (!expectedFileNames.contains(existing.getName)) {
log.info(
diff --git a/std-bits/base/src/main/java/org/enso/base/Text_Utils.java b/std-bits/base/src/main/java/org/enso/base/Text_Utils.java
index 923fbb507ada..2da16c249bab 100644
--- a/std-bits/base/src/main/java/org/enso/base/Text_Utils.java
+++ b/std-bits/base/src/main/java/org/enso/base/Text_Utils.java
@@ -13,6 +13,7 @@
import org.enso.base.text.CaseFoldedString.Grapheme;
import org.enso.base.text.GraphemeSpan;
import org.enso.base.text.Utf16Span;
+import org.enso.polyglot.common_utils.Core_Text_Utils;
/** Utils for standard library operations on Text. */
public class Text_Utils {
@@ -268,14 +269,8 @@ public static long char_length(String str) {
* @param str the string to measure
* @return length of the string
*/
- private static long grapheme_length(String str) {
- BreakIterator iter = BreakIterator.getCharacterInstance();
- iter.setText(str);
- long len = 0;
- while (iter.next() != BreakIterator.DONE) {
- len++;
- }
- return len;
+ public static long grapheme_length(String str) {
+ return Core_Text_Utils.computeGraphemeLength(str);
}
/** Returns a prefix of the string not exceeding the provided grapheme length. */
@@ -566,4 +561,9 @@ public static String replace_spans(String str, List