Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a common-polyglot-core-utils project #5855

Merged
merged 6 commits into from
Mar 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -1504,6 +1504,7 @@ lazy val runtime = (project in file("engine/runtime"))
}.evaluated,
Benchmark / parallelExecution := false
)
.dependsOn(`common-polyglot-core-utils`)
.dependsOn(`runtime-language-epb`)
.dependsOn(`edition-updater`)
.dependsOn(`interpreter-dsl`)
Expand Down Expand Up @@ -2025,22 +2026,37 @@ lazy val `std-base` = project
Compile / packageBin / artifactPath :=
`base-polyglot-root` / "std-base.jar",
libraryDependencies ++= Seq(
"com.ibm.icu" % "icu4j" % icuVersion,
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided",
"org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided"
),
Compile / packageBin := Def.task {
val result = (Compile / packageBin).value
val _ensureCoreIsCompiled =
(`common-polyglot-core-utils` / Compile / packageBin).value
val _ = StdBits
.copyDependencies(
`base-polyglot-root`,
Some("std-base.jar"),
Seq("std-base.jar", "common-polyglot-core-utils.jar"),
ignoreScalaLibrary = true
)
.value
result
}.value
)
.dependsOn(`common-polyglot-core-utils`)

lazy val `common-polyglot-core-utils` = project
.in(file("lib/scala/common-polyglot-core-utils"))
.settings(
frgaalJavaCompilerSetting,
autoScalaLibrary := false,
Compile / packageBin / artifactPath :=
`base-polyglot-root` / "common-polyglot-core-utils.jar",
libraryDependencies ++= Seq(
"com.ibm.icu" % "icu4j" % icuVersion,
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided"
)
)

lazy val `enso-test-java-helpers` = project
.in(file("test/Tests/polyglot-sources/enso-test-java-helpers"))
Expand Down Expand Up @@ -2093,7 +2109,7 @@ lazy val `std-table` = project
val _ = StdBits
.copyDependencies(
`table-polyglot-root`,
Some("std-table.jar"),
Seq("std-table.jar"),
ignoreScalaLibrary = true
)
.value
Expand All @@ -2118,7 +2134,7 @@ lazy val `std-image` = project
val _ = StdBits
.copyDependencies(
`image-polyglot-root`,
Some("std-image.jar"),
Seq("std-image.jar"),
ignoreScalaLibrary = true
)
.value
Expand All @@ -2143,7 +2159,7 @@ lazy val `std-google-api` = project
val _ = StdBits
.copyDependencies(
`google-api-polyglot-root`,
Some("std-google-api.jar"),
Seq("std-google-api.jar"),
ignoreScalaLibrary = true
)
.value
Expand Down Expand Up @@ -2172,7 +2188,7 @@ lazy val `std-database` = project
val _ = StdBits
.copyDependencies(
`database-polyglot-root`,
Some("std-database.jar"),
Seq("std-database.jar"),
ignoreScalaLibrary = true
)
.value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import org.enso.interpreter.dsl.Builtin;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.interpreter.runtime.data.text.Text;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
import org.enso.polyglot.common_utils.Core_Date_Utils;

@ExportLibrary(InteropLibrary.class)
@ExportLibrary(TypesLibrary.class)
Expand Down Expand Up @@ -71,12 +71,7 @@ public static EnsoDateTime now() {
@Builtin.WrapException(from = DateTimeParseException.class)
@CompilerDirectives.TruffleBoundary
public static EnsoDateTime parse(String text) {
String iso = text;
if (text != null && text.length() > 10 && text.charAt(10) == ' ') {
var builder = new StringBuilder(iso);
builder.replace(10, 11, "T");
iso = builder.toString();
}
String iso = Core_Date_Utils.normaliseISODateTime(text);

var datetime = DATE_TIME_FORMATTER.parseBest(iso, ZonedDateTime::from, LocalDateTime::from);
if (datetime instanceof ZonedDateTime zdt) {
Expand Down Expand Up @@ -273,17 +268,5 @@ public Object toDisplayString(boolean allowSideEffects) {
private static final EnsoDateTime epochStart =
EnsoDateTime.create(1582, 10, 15, 0, 0, 0, 0, EnsoTimeZone.parse("UTC"));

private static final DateTimeFormatter DATE_TIME_FORMATTER =
new DateTimeFormatterBuilder()
.append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.optionalStart()
.parseLenient()
.appendOffsetId()
.optionalEnd()
.optionalStart()
.appendLiteral('[')
.parseCaseSensitive()
.appendZoneRegionId()
.appendLiteral(']')
.toFormatter();
private static final DateTimeFormatter DATE_TIME_FORMATTER = Core_Date_Utils.defaultZonedDateTimeFormatter();
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.enso.interpreter.runtime.data.text;

import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Normalizer2;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.dsl.Cached;
Expand All @@ -19,6 +18,7 @@
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.enso.interpreter.dsl.Builtin;
import org.enso.polyglot.common_utils.Core_Text_Utils;

/** The main runtime type for Enso's Text. */
@ExportLibrary(InteropLibrary.class)
Expand Down Expand Up @@ -178,13 +178,7 @@ String asString(@Cached("build()") @Cached.Shared("strings") ToJavaStringNode to

@CompilerDirectives.TruffleBoundary
private int computeLength() {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(toString());
int len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
return Core_Text_Utils.computeGraphemeLength(toString());
}

@CompilerDirectives.TruffleBoundary
Expand All @@ -193,37 +187,7 @@ String toDisplayString(
boolean allowSideEffects,
@Cached("build()") @Cached.Shared("strings") ToJavaStringNode toJavaStringNode) {
String str = toJavaStringNode.execute(this);
int len = str.length();
int outputLength = len + 2; // Precise if there are no special characters.

// TODO This should be more extensible; while it's still a small fixed set,
// a switch is probably fastest (unconfirmed)

StringBuffer strBuf = new StringBuffer(outputLength);

strBuf.append('\'');

for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
switch (c) {
case '\\' -> strBuf.append("\\\\");
case '\'' -> strBuf.append("\\'");
case '\n' -> strBuf.append("\\n");
case '\t' -> strBuf.append("\\t");
case '\0' -> strBuf.append("\\0");
case '\u0007' -> strBuf.append("\\a");
case '\u0008' -> strBuf.append("\\b");
case '\u000c' -> strBuf.append("\\f");
case '\r' -> strBuf.append("\\r");
case '\u000B' -> strBuf.append("\\v");
case '\u001B' -> strBuf.append("\\e");
default -> strBuf.append(c);
}
}

strBuf.append('\'');

return strBuf.toString();
return Core_Text_Utils.prettyPrint(str);
}

@ExportMessage
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package org.enso.polyglot.common_utils;

import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;

public class Core_Date_Utils {
/**
* Replace space with T in ISO date time string to make it compatible with ISO format.
* @param dateString Raw date time string with either space or T as separator
* @return ISO format date time string
*/
public static String normaliseISODateTime(String dateString) {
if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') {
var builder = new StringBuilder(dateString);
builder.replace(10, 11, "T");
return builder.toString();
}

return dateString;
}

/** @return default Date Time formatter for parsing a Date_Time. */
public static DateTimeFormatter defaultZonedDateTimeFormatter() {
return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.optionalStart().parseLenient().appendOffsetId().optionalEnd()
.optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']')
.toFormatter();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.enso.polyglot.common_utils;

import com.ibm.icu.text.BreakIterator;

public class Core_Text_Utils {
/** Computes the length of the string as the number of grapheme clusters it contains. */
public static int computeGraphemeLength(String text) {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(text);
int len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
}

/** Pretty prints the string, escaping special characters. */
public static String prettyPrint(String str) {
int len = str.length();
int outputLength = len + 2; // Precise if there are no special characters.

// TODO This should be more extensible; while it's still a small fixed set,
// a switch is probably fastest (unconfirmed)

StringBuilder sb = new StringBuilder(outputLength);

sb.append('\'');

for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
switch (c) {
case '\\' -> sb.append("\\\\");
case '\'' -> sb.append("\\'");
case '\n' -> sb.append("\\n");
case '\t' -> sb.append("\\t");
case '\0' -> sb.append("\\0");
case '\u0007' -> sb.append("\\a");
case '\u0008' -> sb.append("\\b");
case '\u000c' -> sb.append("\\f");
case '\r' -> sb.append("\\r");
case '\u000B' -> sb.append("\\v");
case '\u001B' -> sb.append("\\e");
default -> sb.append(c);
}
}

sb.append('\'');

return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* This package contains common utilities which can be used both by the engine runtime and the libraries.
* <p>
* This allows us to avoid duplicating code between the runtime and library projects for operations that need to be
* accessible on both sides.
* <p>
* The utilities that belong here are mostly operations that are builtins of the Enso language but also need to be used
* from our Java libraries where the cost of calling back to Enso methods is relatively high, so accessing the Java
* implementations directly is desirable. The primary example of that is the algorithm for computing the length of a
* string by counting the grapheme clusters.
* <p>
* Due to classpath separation, the class files of this package will be duplicated with one copy embedded in the engine
* and another attached as `common-polyglot-core-utils.jar` placed in the `polyglot` directory of the Standard.Base
* library. But it allows us to avoid duplicating the code, so we can have a single source of truth for each
* implementation.
* <p>
* Due to the copying, the project should not be expanded too much, but all utilities which would end up being
* duplicated are best moved here.
*/
package org.enso.polyglot.common_utils;
11 changes: 6 additions & 5 deletions project/StdBits.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@ object StdBits {
* directory.
*
* @param destination location where to put the dependencies
* @param baseJarName name of the base generated JAR (if any); unexpected
* (old) files are removed, so this task needs to know
* this file's name to avoid removing it
* @param providedJarNames name of JARs generated by the local projects;
* unexpected (old) files are removed, so this task
* needs to know these files' names to avoid removing
* them
* @param ignoreScalaLibrary whether to ignore Scala dependencies that are
* added by default be SBT and are not relevant in
* pure-Java projects
*/
def copyDependencies(
destination: File,
baseJarName: Option[String],
providedJarNames: Seq[String],
ignoreScalaLibrary: Boolean
): Def.Initialize[Task[Unit]] =
Def.task {
Expand Down Expand Up @@ -50,7 +51,7 @@ object StdBits {
Tracked.diffInputs(dependencyStore, FileInfo.hash)(relevantFiles.toSet) {
report =>
val expectedFileNames =
report.checked.map(file => file.getName) ++ baseJarName.toSeq
report.checked.map(file => file.getName) ++ providedJarNames
for (existing <- IO.listFiles(destination)) {
if (!expectedFileNames.contains(existing.getName)) {
log.info(
Expand Down
16 changes: 8 additions & 8 deletions std-bits/base/src/main/java/org/enso/base/Text_Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.enso.base.text.CaseFoldedString.Grapheme;
import org.enso.base.text.GraphemeSpan;
import org.enso.base.text.Utf16Span;
import org.enso.polyglot.common_utils.Core_Text_Utils;

/** Utils for standard library operations on Text. */
public class Text_Utils {
Expand Down Expand Up @@ -268,14 +269,8 @@ public static long char_length(String str) {
* @param str the string to measure
* @return length of the string
*/
private static long grapheme_length(String str) {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(str);
long len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
public static long grapheme_length(String str) {
return Core_Text_Utils.computeGraphemeLength(str);
}

/** Returns a prefix of the string not exceeding the provided grapheme length. */
Expand Down Expand Up @@ -566,4 +561,9 @@ public static String replace_spans(String str, List<Utf16Span> spans, String new
sb.append(str, current_ix, str.length());
return sb.toString();
}

/** Pretty prints the string, escaping special characters. */
public static String pretty_print(String str) {
return Core_Text_Utils.prettyPrint(str);
}
}
Loading