-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Factor Ident escaping from python code generation (#1251)
- Loading branch information
Showing
4 changed files
with
171 additions
and
139 deletions.
There are no files selected for viewing
116 changes: 116 additions & 0 deletions
116
core/src/main/scala/org/bykn/bosatsu/codegen/Idents.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
package org.bykn.bosatsu.codegen | ||
|
||
object Idents { | ||
|
||
private[this] val base62Items = | ||
(('0' to '9') ++ ('A' to 'Z') ++ ('a' to 'z')).toSet | ||
|
||
private[this] val offset0: Int = '0'.toInt | ||
private[this] val offsetA: Int = 'A'.toInt - 10 | ||
private[this] val offseta: Int = 'a'.toInt - 36 | ||
|
||
private def toBase62(c: Char, bldr: java.lang.StringBuilder): java.lang.StringBuilder = | ||
if (base62Items(c)) bldr.append(c) | ||
else if (c == '_') bldr.append("__") | ||
else { | ||
def toChar(i0: Int): Char = | ||
(i0 + ( | ||
if (i0 < 36) { | ||
if (i0 < 10) offset0 | ||
else offsetA | ||
} | ||
else offseta | ||
)).toChar | ||
|
||
def toString(i: Int): Unit = | ||
if (i < 62) { | ||
val _ = bldr.append(toChar(i)) | ||
} | ||
else { | ||
val i1 = i / 62 | ||
val i0 = i % 62 | ||
// this isn't tail recursion, but it's okay | ||
// because the int can't be that big so we can | ||
// only divide by 62 a few times | ||
toString(i1) | ||
val _ = bldr.append(toChar(i0)) | ||
} | ||
|
||
bldr.append('_') | ||
toString(c.toInt) | ||
bldr.append('_') | ||
} | ||
|
||
def escape(prefix: String, str: CharSequence): String = { | ||
val bldr = new java.lang.StringBuilder | ||
var idx = 0 | ||
val len = str.length | ||
bldr.append(prefix) | ||
while (idx < len) { | ||
toBase62(str.charAt(idx), bldr) | ||
idx += 1 | ||
} | ||
bldr.toString() | ||
} | ||
|
||
private def unBase62( | ||
str: String, | ||
offset: Int, | ||
bldr: java.lang.StringBuilder | ||
): Int = { | ||
var idx = offset | ||
var num = 0 | ||
|
||
while (idx < str.length) { | ||
val c = str.charAt(idx) | ||
idx += 1 | ||
if (c == '_') { | ||
if (idx == offset + 1) { | ||
// this is a literal _ | ||
bldr.append('_') | ||
} | ||
else { | ||
// done, this is the trailing _ | ||
bldr.append(num.toChar) | ||
} | ||
return (idx - offset) | ||
} else { | ||
val base = | ||
if (c <= 'Z') { | ||
if (c <= '9') offset0 | ||
else offsetA | ||
} | ||
else offseta | ||
|
||
num = num * 62 + c.toInt - base | ||
} | ||
} | ||
return -1 | ||
} | ||
|
||
def unescape(prefix: String, str: String): Option[String] = | ||
if (str.startsWith(prefix)) { | ||
val bldr = new java.lang.StringBuilder() | ||
var idx = prefix.length | ||
val len = str.length | ||
while (idx < len) { | ||
val c = str.charAt(idx) | ||
idx += 1 | ||
if (c == '_') { | ||
val res = unBase62(str, idx, bldr) | ||
if (res < 1) return None | ||
else { | ||
// this tells us how many characters we read | ||
idx += res | ||
} | ||
} else { | ||
// this character is literally encoded | ||
bldr.append(c) | ||
} | ||
} | ||
|
||
Some(bldr.toString()) | ||
} else { | ||
None | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
core/src/test/scala/org/bykn/bosatsu/codegen/IdentsTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package org.bykn.bosatsu.codegen | ||
import org.scalacheck.Prop.forAll | ||
|
||
class IdentsTest extends munit.ScalaCheckSuite { | ||
val validIdentChars = | ||
(('0' to '9') ++ ('A' to 'Z') ++ ('a' to 'z')).toSet + '_' | ||
|
||
property("Idents.escape/unescape") { | ||
forAll { (prefix: String, content: String) => | ||
val escaped = Idents.escape(prefix, content) | ||
val stringNums = content.map(_.toInt).toList | ||
Idents.unescape(prefix, escaped) match { | ||
case Some(c1) => assertEquals(c1, content, s"escaped = $escaped, stringNums = $stringNums") | ||
case None => fail(s"expected to unescape: $escaped, stringNums = $stringNums") | ||
} | ||
} | ||
} | ||
|
||
property("escape starts with prefix") { | ||
forAll { (prefix: String, content: String) => | ||
assert(Idents.escape(prefix, content).startsWith(prefix)) | ||
} | ||
} | ||
|
||
property("escape creates validIdentChars") { | ||
forAll { (prefix: String, content: String) => | ||
val escaped = Idents.escape(prefix, content) | ||
assert(escaped.drop(prefix.length).forall(validIdentChars)) | ||
} | ||
} | ||
|
||
property("valid strings are escaped with identity") { | ||
forAll { (prefix: String, content: String) => | ||
val escaped = Idents.escape(prefix, content) | ||
if (content.forall(validIdentChars)) { | ||
assertEquals(escaped, prefix + content.flatMap { | ||
case '_' => "__" | ||
case a => a.toString | ||
}) | ||
} | ||
else { | ||
assert(escaped.length > (prefix + content).length) | ||
} | ||
} | ||
} | ||
|
||
test("some examples") { | ||
assertEquals(Idents.escape("foo", "bar_baz"), "foobar__baz") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters