Skip to content

Commit

Permalink
Add a common-polyglot-core-utils project (#5855)
Browse files Browse the repository at this point in the history
Adds a common project that allows sharing code between the `runtime` and `std-bits`.

Due to classpath separation and the way it is compiled, the classes will be duplicated - we will have one copy for the `runtime` classpath and another copy as a small JAR for `Standard.Base` library.

This is still much better than having the code duplicated - now at least we have a single source of truth for the shared implementations.

Due to the copying we should not expand this project too much, but I encourage to put here any methods that would otherwise require us to copy the code itself.

This may be a good place to put parts of the hashing logic to then allow sharing the logic between the `runtime` and the `MultiValueKey` in the `Table` library (cc: @Akirathan).
  • Loading branch information
radeusgd authored Mar 11, 2023
1 parent 7258414 commit 263c3ad
Show file tree
Hide file tree
Showing 11 changed files with 151 additions and 105 deletions.
28 changes: 22 additions & 6 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -1504,6 +1504,7 @@ lazy val runtime = (project in file("engine/runtime"))
}.evaluated,
Benchmark / parallelExecution := false
)
.dependsOn(`common-polyglot-core-utils`)
.dependsOn(`runtime-language-epb`)
.dependsOn(`edition-updater`)
.dependsOn(`interpreter-dsl`)
Expand Down Expand Up @@ -2025,22 +2026,37 @@ lazy val `std-base` = project
Compile / packageBin / artifactPath :=
`base-polyglot-root` / "std-base.jar",
libraryDependencies ++= Seq(
"com.ibm.icu" % "icu4j" % icuVersion,
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided",
"org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided"
),
Compile / packageBin := Def.task {
val result = (Compile / packageBin).value
val _ensureCoreIsCompiled =
(`common-polyglot-core-utils` / Compile / packageBin).value
val _ = StdBits
.copyDependencies(
`base-polyglot-root`,
Some("std-base.jar"),
Seq("std-base.jar", "common-polyglot-core-utils.jar"),
ignoreScalaLibrary = true
)
.value
result
}.value
)
.dependsOn(`common-polyglot-core-utils`)

lazy val `common-polyglot-core-utils` = project
.in(file("lib/scala/common-polyglot-core-utils"))
.settings(
frgaalJavaCompilerSetting,
autoScalaLibrary := false,
Compile / packageBin / artifactPath :=
`base-polyglot-root` / "common-polyglot-core-utils.jar",
libraryDependencies ++= Seq(
"com.ibm.icu" % "icu4j" % icuVersion,
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided"
)
)

lazy val `enso-test-java-helpers` = project
.in(file("test/Tests/polyglot-sources/enso-test-java-helpers"))
Expand Down Expand Up @@ -2093,7 +2109,7 @@ lazy val `std-table` = project
val _ = StdBits
.copyDependencies(
`table-polyglot-root`,
Some("std-table.jar"),
Seq("std-table.jar"),
ignoreScalaLibrary = true
)
.value
Expand All @@ -2118,7 +2134,7 @@ lazy val `std-image` = project
val _ = StdBits
.copyDependencies(
`image-polyglot-root`,
Some("std-image.jar"),
Seq("std-image.jar"),
ignoreScalaLibrary = true
)
.value
Expand All @@ -2143,7 +2159,7 @@ lazy val `std-google-api` = project
val _ = StdBits
.copyDependencies(
`google-api-polyglot-root`,
Some("std-google-api.jar"),
Seq("std-google-api.jar"),
ignoreScalaLibrary = true
)
.value
Expand Down Expand Up @@ -2172,7 +2188,7 @@ lazy val `std-database` = project
val _ = StdBits
.copyDependencies(
`database-polyglot-root`,
Some("std-database.jar"),
Seq("std-database.jar"),
ignoreScalaLibrary = true
)
.value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import org.enso.interpreter.dsl.Builtin;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.interpreter.runtime.data.text.Text;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
import org.enso.polyglot.common_utils.Core_Date_Utils;

@ExportLibrary(InteropLibrary.class)
@ExportLibrary(TypesLibrary.class)
Expand Down Expand Up @@ -71,12 +71,7 @@ public static EnsoDateTime now() {
@Builtin.WrapException(from = DateTimeParseException.class)
@CompilerDirectives.TruffleBoundary
public static EnsoDateTime parse(String text) {
String iso = text;
if (text != null && text.length() > 10 && text.charAt(10) == ' ') {
var builder = new StringBuilder(iso);
builder.replace(10, 11, "T");
iso = builder.toString();
}
String iso = Core_Date_Utils.normaliseISODateTime(text);

var datetime = DATE_TIME_FORMATTER.parseBest(iso, ZonedDateTime::from, LocalDateTime::from);
if (datetime instanceof ZonedDateTime zdt) {
Expand Down Expand Up @@ -273,17 +268,5 @@ public Object toDisplayString(boolean allowSideEffects) {
private static final EnsoDateTime epochStart =
EnsoDateTime.create(1582, 10, 15, 0, 0, 0, 0, EnsoTimeZone.parse("UTC"));

private static final DateTimeFormatter DATE_TIME_FORMATTER =
new DateTimeFormatterBuilder()
.append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.optionalStart()
.parseLenient()
.appendOffsetId()
.optionalEnd()
.optionalStart()
.appendLiteral('[')
.parseCaseSensitive()
.appendZoneRegionId()
.appendLiteral(']')
.toFormatter();
private static final DateTimeFormatter DATE_TIME_FORMATTER = Core_Date_Utils.defaultZonedDateTimeFormatter();
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.enso.interpreter.runtime.data.text;

import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Normalizer2;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.dsl.Cached;
Expand All @@ -19,6 +18,7 @@
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.enso.interpreter.dsl.Builtin;
import org.enso.polyglot.common_utils.Core_Text_Utils;

/** The main runtime type for Enso's Text. */
@ExportLibrary(InteropLibrary.class)
Expand Down Expand Up @@ -178,13 +178,7 @@ String asString(@Cached("build()") @Cached.Shared("strings") ToJavaStringNode to

@CompilerDirectives.TruffleBoundary
private int computeLength() {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(toString());
int len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
return Core_Text_Utils.computeGraphemeLength(toString());
}

@CompilerDirectives.TruffleBoundary
Expand All @@ -193,37 +187,7 @@ String toDisplayString(
boolean allowSideEffects,
@Cached("build()") @Cached.Shared("strings") ToJavaStringNode toJavaStringNode) {
String str = toJavaStringNode.execute(this);
int len = str.length();
int outputLength = len + 2; // Precise if there are no special characters.

// TODO This should be more extensible; while it's still a small fixed set,
// a switch is probably fastest (unconfirmed)

StringBuffer strBuf = new StringBuffer(outputLength);

strBuf.append('\'');

for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
switch (c) {
case '\\' -> strBuf.append("\\\\");
case '\'' -> strBuf.append("\\'");
case '\n' -> strBuf.append("\\n");
case '\t' -> strBuf.append("\\t");
case '\0' -> strBuf.append("\\0");
case '\u0007' -> strBuf.append("\\a");
case '\u0008' -> strBuf.append("\\b");
case '\u000c' -> strBuf.append("\\f");
case '\r' -> strBuf.append("\\r");
case '\u000B' -> strBuf.append("\\v");
case '\u001B' -> strBuf.append("\\e");
default -> strBuf.append(c);
}
}

strBuf.append('\'');

return strBuf.toString();
return Core_Text_Utils.prettyPrint(str);
}

@ExportMessage
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package org.enso.polyglot.common_utils;

import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;

public class Core_Date_Utils {
/**
* Replace space with T in ISO date time string to make it compatible with ISO format.
* @param dateString Raw date time string with either space or T as separator
* @return ISO format date time string
*/
public static String normaliseISODateTime(String dateString) {
if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') {
var builder = new StringBuilder(dateString);
builder.replace(10, 11, "T");
return builder.toString();
}

return dateString;
}

/** @return default Date Time formatter for parsing a Date_Time. */
public static DateTimeFormatter defaultZonedDateTimeFormatter() {
return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.optionalStart().parseLenient().appendOffsetId().optionalEnd()
.optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']')
.toFormatter();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.enso.polyglot.common_utils;

import com.ibm.icu.text.BreakIterator;

public class Core_Text_Utils {
/** Computes the length of the string as the number of grapheme clusters it contains. */
public static int computeGraphemeLength(String text) {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(text);
int len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
}

/** Pretty prints the string, escaping special characters. */
public static String prettyPrint(String str) {
int len = str.length();
int outputLength = len + 2; // Precise if there are no special characters.

// TODO This should be more extensible; while it's still a small fixed set,
// a switch is probably fastest (unconfirmed)

StringBuilder sb = new StringBuilder(outputLength);

sb.append('\'');

for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
switch (c) {
case '\\' -> sb.append("\\\\");
case '\'' -> sb.append("\\'");
case '\n' -> sb.append("\\n");
case '\t' -> sb.append("\\t");
case '\0' -> sb.append("\\0");
case '\u0007' -> sb.append("\\a");
case '\u0008' -> sb.append("\\b");
case '\u000c' -> sb.append("\\f");
case '\r' -> sb.append("\\r");
case '\u000B' -> sb.append("\\v");
case '\u001B' -> sb.append("\\e");
default -> sb.append(c);
}
}

sb.append('\'');

return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* This package contains common utilities which can be used both by the engine runtime and the libraries.
* <p>
* This allows us to avoid duplicating code between the runtime and library projects for operations that need to be
* accessible on both sides.
* <p>
* The utilities that belong here are mostly operations that are builtins of the Enso language but also need to be used
* from our Java libraries where the cost of calling back to Enso methods is relatively high, so accessing the Java
* implementations directly is desirable. The primary example of that is the algorithm for computing the length of a
* string by counting the grapheme clusters.
* <p>
* Due to classpath separation, the class files of this package will be duplicated with one copy embedded in the engine
* and another attached as `common-polyglot-core-utils.jar` placed in the `polyglot` directory of the Standard.Base
* library. But it allows us to avoid duplicating the code, so we can have a single source of truth for each
* implementation.
* <p>
* Due to the copying, the project should not be expanded too much, but all utilities which would end up being
* duplicated are best moved here.
*/
package org.enso.polyglot.common_utils;
11 changes: 6 additions & 5 deletions project/StdBits.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@ object StdBits {
* directory.
*
* @param destination location where to put the dependencies
* @param baseJarName name of the base generated JAR (if any); unexpected
* (old) files are removed, so this task needs to know
* this file's name to avoid removing it
* @param providedJarNames name of JARs generated by the local projects;
* unexpected (old) files are removed, so this task
* needs to know these files' names to avoid removing
* them
* @param ignoreScalaLibrary whether to ignore Scala dependencies that are
* added by default be SBT and are not relevant in
* pure-Java projects
*/
def copyDependencies(
destination: File,
baseJarName: Option[String],
providedJarNames: Seq[String],
ignoreScalaLibrary: Boolean
): Def.Initialize[Task[Unit]] =
Def.task {
Expand Down Expand Up @@ -50,7 +51,7 @@ object StdBits {
Tracked.diffInputs(dependencyStore, FileInfo.hash)(relevantFiles.toSet) {
report =>
val expectedFileNames =
report.checked.map(file => file.getName) ++ baseJarName.toSeq
report.checked.map(file => file.getName) ++ providedJarNames
for (existing <- IO.listFiles(destination)) {
if (!expectedFileNames.contains(existing.getName)) {
log.info(
Expand Down
16 changes: 8 additions & 8 deletions std-bits/base/src/main/java/org/enso/base/Text_Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.enso.base.text.CaseFoldedString.Grapheme;
import org.enso.base.text.GraphemeSpan;
import org.enso.base.text.Utf16Span;
import org.enso.polyglot.common_utils.Core_Text_Utils;

/** Utils for standard library operations on Text. */
public class Text_Utils {
Expand Down Expand Up @@ -268,14 +269,8 @@ public static long char_length(String str) {
* @param str the string to measure
* @return length of the string
*/
private static long grapheme_length(String str) {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(str);
long len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
public static long grapheme_length(String str) {
return Core_Text_Utils.computeGraphemeLength(str);
}

/** Returns a prefix of the string not exceeding the provided grapheme length. */
Expand Down Expand Up @@ -566,4 +561,9 @@ public static String replace_spans(String str, List<Utf16Span> spans, String new
sb.append(str, current_ix, str.length());
return sb.toString();
}

/** Pretty prints the string, escaping special characters. */
public static String pretty_print(String str) {
return Core_Text_Utils.prettyPrint(str);
}
}
Loading

0 comments on commit 263c3ad

Please sign in to comment.