From 26c704cb56758b2ef7a7f872e7fc53a444ca841a Mon Sep 17 00:00:00 2001 From: Erik Osheim Date: Fri, 27 Sep 2019 10:51:42 -0400 Subject: [PATCH 1/3] Refactor Char and String generators This is part of work on Gen performance improvements. --- src/main/scala/org/scalacheck/Arbitrary.scala | 19 ++-- src/main/scala/org/scalacheck/Gen.scala | 90 +++++++++++++------ 2 files changed, 73 insertions(+), 36 deletions(-) diff --git a/src/main/scala/org/scalacheck/Arbitrary.scala b/src/main/scala/org/scalacheck/Arbitrary.scala index 69b45bb91..5ccbd2828 100644 --- a/src/main/scala/org/scalacheck/Arbitrary.scala +++ b/src/main/scala/org/scalacheck/Arbitrary.scala @@ -121,16 +121,13 @@ private[scalacheck] sealed trait ArbitraryLowPriority { /** Arbitrary instance of Char */ implicit lazy val arbChar: Arbitrary[Char] = Arbitrary { - // exclude 0xFFFE due to this bug: https://bit.ly/2pTpAu8 - // also exclude 0xFFFF as it is not unicode: http://bit.ly/2cVBrzK - val validRangesInclusive = List[(Char, Char)]( - (0x0000, 0xD7FF), - (0xE000, 0xFFFD) - ) - - Gen.frequency((validRangesInclusive.map { - case (first, last) => (last + 1 - first, Gen.choose[Char](first, last)) - }: List[(Int, Gen[Char])]): _*) + // valid ranges are [0x0000, 0xD7FF] and [0xE000, 0xFFFD]. + // + // ((0xFFFD + 1) - 0xE000) + ((0xD7FF + 1) - 0x0000) + choose(0, 63486).map { i => + if (i <= 0xD7FF) i.toChar + else (i + 2048).toChar + } } /** Arbitrary instance of Byte */ @@ -149,7 +146,7 @@ private[scalacheck] sealed trait ArbitraryLowPriority { /** Arbitrary instance of String */ implicit lazy val arbString: Arbitrary[String] = - Arbitrary(arbitrary[List[Char]] map (_.mkString)) + Arbitrary(Gen.stringOf(arbitrary[Char])) /** Arbitrary instance of Date */ implicit lazy val arbDate: Arbitrary[java.util.Date] = diff --git a/src/main/scala/org/scalacheck/Gen.scala b/src/main/scala/org/scalacheck/Gen.scala index 1e522e781..b7d40725f 100644 --- a/src/main/scala/org/scalacheck/Gen.scala +++ b/src/main/scala/org/scalacheck/Gen.scala @@ -823,86 +823,120 @@ object Gen extends GenArities with GenVersionSpecific { //// Character Generators //// + private def charSample(cs: Array[Char]): Gen[Char] = + new Gen[Char] { + def doApply(p: P, seed0: Seed): Gen.R[Char] = { + val (x, seed1) = seed0.long + val i = ((x & Long.MaxValue) % cs.length).toInt + r(Some(cs(i)), seed1) + } + } + /** Generates a numerical character */ val numChar: Gen[Char] = - choose(48.toChar, 57.toChar) + charSample(('0' to '9').toArray) /** Generates an upper-case alpha character */ val alphaUpperChar: Gen[Char] = - choose(65.toChar, 90.toChar) + charSample(('A' to 'Z').toArray) /** Generates a lower-case alpha character */ val alphaLowerChar: Gen[Char] = - choose(97.toChar, 122.toChar) + charSample(('a' to 'z').toArray) /** Generates an alpha character */ - val alphaChar: Gen[Char] = - frequency((1,alphaUpperChar), (9,alphaLowerChar)) + val alphaChar = + charSample((('A' to 'Z') ++ ('a' to 'z')).toArray) /** Generates an alphanumerical character */ - val alphaNumChar: Gen[Char] = - frequency((1,numChar), (9,alphaChar)) + val alphaNumChar = + charSample((('0' to '9') ++ ('A' to 'Z') ++ ('a' to 'z')).toArray) /** Generates a ASCII character, with extra weighting for printable characters */ val asciiChar: Gen[Char] = - chooseNum(0, 127, 32 to 126:_*).map(_.toChar) + charSample((0.toChar to 127.toChar).toArray) /** Generates a ASCII printable character */ val asciiPrintableChar: Gen[Char] = - choose(32.toChar, 126.toChar) + charSample((32.toChar to 126.toChar).toArray) /** Generates a character that can represent a valid hexadecimal digit. This * includes both upper and lower case values. */ val hexChar: Gen[Char] = - Gen.oneOf( - Gen.oneOf("0123456789abcdef".toSeq), - Gen.oneOf("0123456789ABCDEF".toSeq) - ) + charSample("0123456789abcdef0123456789ABCDEF".toArray) //// String Generators //// + @tailrec private def mkString(n: Int, sb: StringBuilder, gc: Gen[Char], p: P, seed0: Seed): R[String] = + if (n <= 0) { + r(Some(sb.toString), seed0) + } else { + val res = gc.doApply(p, seed0) + res.retrieve match { + case Some(c) => + sb += c + case None => + () + } + mkString(n - 1, sb, gc, p, res.seed) + } + + def stringOfN(n: Int, gc: Gen[Char]): Gen[String] = + gen { (p, seed) => + mkString(n, new StringBuilder, gc, p, seed) + } + + def stringOf(gc: Gen[Char]): Gen[String] = + gen { (p, seed0) => + val (n, seed1) = Gen.mkSize(p, seed0) + mkString(n, new StringBuilder, gc, p, seed1) + } + /** Generates a string that starts with a lower-case alpha character, * and only contains alphanumerical characters */ val identifier: Gen[String] = - for { - c <- alphaLowerChar - cs <- listOf(alphaNumChar) - } yield (c::cs).mkString + gen { (p, seed0) => + val (n, seed1) = Gen.mkSize(p, seed0) + val sb = new StringBuilder + val res1 = alphaLowerChar.doApply(p, seed1) + sb += res1.retrieve.get + mkString(n - 1, sb, alphaNumChar, p, res1.seed) + } /** Generates a string of digits */ val numStr: Gen[String] = - listOf(numChar).map(_.mkString) + stringOf(numChar) /** Generates a string of upper-case alpha characters */ val alphaUpperStr: Gen[String] = - listOf(alphaUpperChar).map(_.mkString) + stringOf(alphaUpperChar) /** Generates a string of lower-case alpha characters */ val alphaLowerStr: Gen[String] = - listOf(alphaLowerChar).map(_.mkString) + stringOf(alphaLowerChar) /** Generates a string of alpha characters */ val alphaStr: Gen[String] = - listOf(alphaChar).map(_.mkString) + stringOf(alphaChar) /** Generates a string of alphanumerical characters */ val alphaNumStr: Gen[String] = - listOf(alphaNumChar).map(_.mkString) + stringOf(alphaNumChar) /** Generates a string of ASCII characters, with extra weighting for printable characters */ val asciiStr: Gen[String] = - listOf(asciiChar).map(_.mkString) + stringOf(asciiChar) /** Generates a string of ASCII printable characters */ val asciiPrintableStr: Gen[String] = - listOf(asciiPrintableChar).map(_.mkString) + stringOf(asciiPrintableChar) /** Generates a string that can represent a valid hexadecimal digit. This * includes both upper and lower case values. */ val hexStr: Gen[String] = - listOf(hexChar).map(_.mkString) + stringOf(hexChar) //// Number Generators //// @@ -1041,6 +1075,12 @@ object Gen extends GenArities with GenVersionSpecific { 1 -> const(Duration.Zero), 6 -> finiteDuration) + private def mkSize(p: Gen.Parameters, seed0: Seed): (Int, Seed) = { + val maxSize = Integer.max(p.size + 1, 1) + val (x, seed1) = seed0.long + ((x % maxSize).toInt, seed1) + } + // used to calculate how many per-item retries we should allow. private def collectionRetries(n: Int): Int = Integer.max(10, n / 10) From 31c438a529d344d3f9b7bf203cc7f96af187b27d Mon Sep 17 00:00:00 2001 From: Erik Osheim Date: Sat, 28 Sep 2019 17:18:48 -0400 Subject: [PATCH 2/3] Respond to Oscar's review comments. In particular, this adds retrying to the infiniteStream, stringOf, and stringOfN combinators. It also slightly optimizes the loop conditions on buildableOfN, removes lazy from most arbitrary definitions, and cleans up a few other things. This commit does appear to have made some benchmarks slower, although it's possible my machine is just more busy than it was. I've also added a few more benchmarks. --- src/main/scala/org/scalacheck/Gen.scala | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/main/scala/org/scalacheck/Gen.scala b/src/main/scala/org/scalacheck/Gen.scala index b7d40725f..1fdc2583c 100644 --- a/src/main/scala/org/scalacheck/Gen.scala +++ b/src/main/scala/org/scalacheck/Gen.scala @@ -868,19 +868,25 @@ object Gen extends GenArities with GenVersionSpecific { //// String Generators //// - @tailrec private def mkString(n: Int, sb: StringBuilder, gc: Gen[Char], p: P, seed0: Seed): R[String] = - if (n <= 0) { - r(Some(sb.toString), seed0) - } else { - val res = gc.doApply(p, seed0) + private def mkString(n: Int, sb: StringBuilder, gc: Gen[Char], p: P, seed0: Seed): R[String] = { + var seed: Seed = seed0 + val allowedFailures = Gen.collectionRetries(n) + var failures = 0 + var count = 0 + while (count < n) { + val res = gc.doApply(p, seed) res.retrieve match { case Some(c) => sb += c + count += 1 case None => - () + failures += 1 + if (failures >= allowedFailures) return r(None, res.seed) } - mkString(n - 1, sb, gc, p, res.seed) + seed = res.seed } + r(Some(sb.toString), seed) + } def stringOfN(n: Int, gc: Gen[Char]): Gen[String] = gen { (p, seed) => @@ -1075,10 +1081,11 @@ object Gen extends GenArities with GenVersionSpecific { 1 -> const(Duration.Zero), 6 -> finiteDuration) + // used to compute a uniformly-distributed size private def mkSize(p: Gen.Parameters, seed0: Seed): (Int, Seed) = { val maxSize = Integer.max(p.size + 1, 1) val (x, seed1) = seed0.long - ((x % maxSize).toInt, seed1) + (((x & Long.MaxValue) % maxSize).toInt, seed1) } // used to calculate how many per-item retries we should allow. From 03ee05e8f4c146a9f6db67367cd70e2326e983b8 Mon Sep 17 00:00:00 2001 From: Erik Osheim Date: Sat, 28 Sep 2019 17:30:02 -0400 Subject: [PATCH 3/3] Fix some bugs with manually-defined doApply methods. --- src/main/scala/org/scalacheck/Gen.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/scala/org/scalacheck/Gen.scala b/src/main/scala/org/scalacheck/Gen.scala index 1fdc2583c..3e997daf7 100644 --- a/src/main/scala/org/scalacheck/Gen.scala +++ b/src/main/scala/org/scalacheck/Gen.scala @@ -826,9 +826,10 @@ object Gen extends GenArities with GenVersionSpecific { private def charSample(cs: Array[Char]): Gen[Char] = new Gen[Char] { def doApply(p: P, seed0: Seed): Gen.R[Char] = { - val (x, seed1) = seed0.long + val seed1 = p.initialSeed.getOrElse(seed0) + val (x, seed2) = seed1.long val i = ((x & Long.MaxValue) % cs.length).toInt - r(Some(cs(i)), seed1) + r(Some(cs(i)), seed2) } } @@ -845,11 +846,11 @@ object Gen extends GenArities with GenVersionSpecific { charSample(('a' to 'z').toArray) /** Generates an alpha character */ - val alphaChar = + val alphaChar: Gen[Char] = charSample((('A' to 'Z') ++ ('a' to 'z')).toArray) /** Generates an alphanumerical character */ - val alphaNumChar = + val alphaNumChar: Gen[Char] = charSample((('0' to '9') ++ ('A' to 'Z') ++ ('a' to 'z')).toArray) /** Generates a ASCII character, with extra weighting for printable characters */