diff --git a/core/src/main/scala/org/bykn/bosatsu/codegen/Idents.scala b/core/src/main/scala/org/bykn/bosatsu/codegen/Idents.scala new file mode 100644 index 000000000..491b7d1d2 --- /dev/null +++ b/core/src/main/scala/org/bykn/bosatsu/codegen/Idents.scala @@ -0,0 +1,116 @@ +package org.bykn.bosatsu.codegen + +object Idents { + + private[this] val base62Items = + (('0' to '9') ++ ('A' to 'Z') ++ ('a' to 'z')).toSet + + private[this] val offset0: Int = '0'.toInt + private[this] val offsetA: Int = 'A'.toInt - 10 + private[this] val offseta: Int = 'a'.toInt - 36 + + private def toBase62(c: Char, bldr: java.lang.StringBuilder): java.lang.StringBuilder = + if (base62Items(c)) bldr.append(c) + else if (c == '_') bldr.append("__") + else { + def toChar(i0: Int): Char = + (i0 + ( + if (i0 < 36) { + if (i0 < 10) offset0 + else offsetA + } + else offseta + )).toChar + + def toString(i: Int): Unit = + if (i < 62) { + val _ = bldr.append(toChar(i)) + } + else { + val i1 = i / 62 + val i0 = i % 62 + // this isn't tail recursion, but it's okay + // because the int can't be that big so we can + // only divide by 62 a few times + toString(i1) + val _ = bldr.append(toChar(i0)) + } + + bldr.append('_') + toString(c.toInt) + bldr.append('_') + } + + def escape(prefix: String, str: CharSequence): String = { + val bldr = new java.lang.StringBuilder + var idx = 0 + val len = str.length + bldr.append(prefix) + while (idx < len) { + toBase62(str.charAt(idx), bldr) + idx += 1 + } + bldr.toString() + } + + private def unBase62( + str: String, + offset: Int, + bldr: java.lang.StringBuilder + ): Int = { + var idx = offset + var num = 0 + + while (idx < str.length) { + val c = str.charAt(idx) + idx += 1 + if (c == '_') { + if (idx == offset + 1) { + // this is a literal _ + bldr.append('_') + } + else { + // done, this is the trailing _ + bldr.append(num.toChar) + } + return (idx - offset) + } else { + val base = + if (c <= 'Z') { + if (c <= '9') offset0 + else offsetA + } + else offseta + + num = num * 62 + c.toInt - base + } + } + return -1 + } + + def unescape(prefix: String, str: String): Option[String] = + if (str.startsWith(prefix)) { + val bldr = new java.lang.StringBuilder() + var idx = prefix.length + val len = str.length + while (idx < len) { + val c = str.charAt(idx) + idx += 1 + if (c == '_') { + val res = unBase62(str, idx, bldr) + if (res < 1) return None + else { + // this tells us how many characters we read + idx += res + } + } else { + // this character is literally encoded + bldr.append(c) + } + } + + Some(bldr.toString()) + } else { + None + } +} \ No newline at end of file diff --git a/core/src/main/scala/org/bykn/bosatsu/codegen/python/PythonGen.scala b/core/src/main/scala/org/bykn/bosatsu/codegen/python/PythonGen.scala index 8b10131f2..459a530e5 100644 --- a/core/src/main/scala/org/bykn/bosatsu/codegen/python/PythonGen.scala +++ b/core/src/main/scala/org/bykn/bosatsu/codegen/python/PythonGen.scala @@ -11,6 +11,7 @@ import org.bykn.bosatsu.{ Parser, RecursionKind } +import org.bykn.bosatsu.codegen.Idents import org.bykn.bosatsu.rankn.Type import org.typelevel.paiges.Doc @@ -69,7 +70,7 @@ object PythonGen { def bind(b: Bindable): (EnvState, Code.Ident) = bindInc(b, 1) { c => - Code.Ident(escapeRaw("___b", b.asString + c.toString)) + Code.Ident(Idents.escape("___b", b.asString + c.toString)) } // in loops we need to substitute @@ -114,7 +115,7 @@ object PythonGen { case None => val impNumber = imports.size val alias = Code.Ident( - escapeRaw("___i", mod.last.name + impNumber.toString) + Idents.escape("___i", mod.last.name + impNumber.toString) ) (copy(imports = imports.updated(mod, alias)), alias) } @@ -464,75 +465,6 @@ object PythonGen { } - private[this] val base62Items = - (('0' to '9') ++ ('A' to 'Z') ++ ('a' to 'z')).toSet - - private def toBase62(c: Char): String = - if (base62Items(c)) c.toString - else if (c == '_') "__" - else { - def toChar(i0: Int): Char = - if (i0 < 0) { - // $COVERAGE-OFF$ - sys.error(s"invalid in: $i0") - // $COVERAGE-ON$ - } else if (i0 < 10) (i0 + '0'.toInt).toChar - else if (i0 < 36) (i0 - 10 + 'A'.toInt).toChar - else if (i0 < 62) (i0 - 36 + 'a'.toInt).toChar - else { - // $COVERAGE-OFF$ - sys.error(s"invalid int: $i0") - // $COVERAGE-ON$ - } - - def toString(i: Int): String = - if (i < 62) toChar(i).toString - else { - val i0 = i % 62 - val i1 = i / 62 - toString(i1) + toChar(i0) - } - - "_" + toString(c.toInt) + "_" - } - - private def escapeRaw(prefix: String, str: String): String = - str.map(toBase62).mkString(prefix, "", "") - - private def unBase62( - str: String, - offset: Int, - bldr: java.lang.StringBuilder - ): Int = { - var idx = offset - var num = 0 - - while (idx < str.length) { - val c = str.charAt(idx) - idx += 1 - if (c == '_') { - if (idx != offset + 1) { - // done - val numC = num.toChar - bldr.append(numC) - return (idx - offset) - } else { - // "__" decodes to "_" - bldr.append('_') - return (idx - offset) - } - } else { - val base = - if (c <= '9') '0'.toInt - else if (c <= 'Z') ('A'.toInt - 10) - else ('a'.toInt - 36) - - num = num * 62 + c.toInt - base - } - } - return -1 - } - // we escape by prefixing by three underscores, ___ and n (for name) // we use other ___x escapes for different name spaces, e.g. tmps, and anons // then we escape _ by __ and any character outside the allowed @@ -551,7 +483,7 @@ object PythonGen { ) Code.Ident(str) else { // we need to escape - Code.Ident(escapeRaw("___n", str)) + Code.Ident(Idents.escape("___n", str)) } } @@ -562,41 +494,9 @@ object PythonGen { ) Code.Ident(str) else { // we need to escape - Code.Ident(escapeRaw("___m", str)) - } - - def unescape(ident: Code.Ident): Option[Bindable] = { - val str = ident.name - val res = if (str.startsWith("___n")) { - val bldr = new java.lang.StringBuilder() - var idx = 4 - while (idx < str.length) { - val c = str.charAt(idx) - idx += 1 - if (c == '_') { - val res = unBase62(str, idx, bldr) - if (res < 1) return None - else { - idx += res - } - } else { - bldr.append(c) - } - } - - bldr.toString() - } else { - str + Code.Ident(Idents.escape("___m", str)) } - if (str.isEmpty) None - else { - Identifier - .optionParse(Identifier.bindableParser, res) - .orElse(Some(Identifier.Backticked(res))) - } - } - /** Remap is used to handle remapping external values */ private def apply(packName: PackageName, name: Bindable, me: Expr)( diff --git a/core/src/test/scala/org/bykn/bosatsu/codegen/IdentsTest.scala b/core/src/test/scala/org/bykn/bosatsu/codegen/IdentsTest.scala new file mode 100644 index 000000000..f856abe3f --- /dev/null +++ b/core/src/test/scala/org/bykn/bosatsu/codegen/IdentsTest.scala @@ -0,0 +1,50 @@ +package org.bykn.bosatsu.codegen +import org.scalacheck.Prop.forAll + +class IdentsTest extends munit.ScalaCheckSuite { + val validIdentChars = + (('0' to '9') ++ ('A' to 'Z') ++ ('a' to 'z')).toSet + '_' + + property("Idents.escape/unescape") { + forAll { (prefix: String, content: String) => + val escaped = Idents.escape(prefix, content) + val stringNums = content.map(_.toInt).toList + Idents.unescape(prefix, escaped) match { + case Some(c1) => assertEquals(c1, content, s"escaped = $escaped, stringNums = $stringNums") + case None => fail(s"expected to unescape: $escaped, stringNums = $stringNums") + } + } + } + + property("escape starts with prefix") { + forAll { (prefix: String, content: String) => + assert(Idents.escape(prefix, content).startsWith(prefix)) + } + } + + property("escape creates validIdentChars") { + forAll { (prefix: String, content: String) => + val escaped = Idents.escape(prefix, content) + assert(escaped.drop(prefix.length).forall(validIdentChars)) + } + } + + property("valid strings are escaped with identity") { + forAll { (prefix: String, content: String) => + val escaped = Idents.escape(prefix, content) + if (content.forall(validIdentChars)) { + assertEquals(escaped, prefix + content.flatMap { + case '_' => "__" + case a => a.toString + }) + } + else { + assert(escaped.length > (prefix + content).length) + } + } + } + + test("some examples") { + assertEquals(Idents.escape("foo", "bar_baz"), "foobar__baz") + } +} \ No newline at end of file diff --git a/core/src/test/scala/org/bykn/bosatsu/codegen/python/PythonGenTest.scala b/core/src/test/scala/org/bykn/bosatsu/codegen/python/PythonGenTest.scala index a00b62edd..be90970ae 100644 --- a/core/src/test/scala/org/bykn/bosatsu/codegen/python/PythonGenTest.scala +++ b/core/src/test/scala/org/bykn/bosatsu/codegen/python/PythonGenTest.scala @@ -1,6 +1,5 @@ package org.bykn.bosatsu.codegen.python -import org.bykn.bosatsu.Identifier.{Bindable, unsafeBindable} import org.bykn.bosatsu.Generators.bindIdentGen import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks.{ forAll, @@ -14,25 +13,6 @@ class PythonGenTest extends AnyFunSuite { PropertyCheckConfiguration(minSuccessful = 5000) // PropertyCheckConfiguration(minSuccessful = 500) - test("PythonGen.escape round trips") { - - def law(b: Bindable) = { - val ident = PythonGen.escape(b) - PythonGen.unescape(ident) match { - case Some(b1) => assert(b1.asString == b.asString) - case None => assert(false, s"$b => $ident could not round trip") - } - } - - forAll(bindIdentGen)(law(_)) - - val examples: List[Bindable] = - List("`12 =_=`", "`N`").map(unsafeBindable) - - examples.foreach(law(_)) - - } - val PythonName = "[_A-Za-z][_A-Za-z0-9]*".r.pattern test("all escapes are valid python identifiers") { @@ -46,20 +26,6 @@ class PythonGenTest extends AnyFunSuite { } } - test("if unescape works, escape would round trip") { - forAll { (s: String) => - if (Code.python2Name.matcher(s).matches) { - val ident = Code.Ident(s) - PythonGen.unescape(ident) match { - case Some(b) => - assert(PythonGen.escape(b) == ident) - case None => - () - } - } - } - } - test("we can parse an example externals file") { val extstr = """ { IO: { foo: bar.baz, quux: quux.quux_impl }, Bop: { foo: collections.queue } }