diff --git a/build.sbt b/build.sbt index dab2d5aeb8e93..54f3b322f2182 100644 --- a/build.sbt +++ b/build.sbt @@ -273,7 +273,7 @@ lazy val importer = module("importer", lazy val insight = module("insight", Seq(common, game, user, analyse, relation, pref, socket, round, security), - Seq(scalatags, breeze) ++ reactivemongo.bundle + Seq(scalatags, apacheMath) ++ reactivemongo.bundle ) lazy val tutor = module("tutor", diff --git a/modules/insight/src/main/Gaussian.scala b/modules/insight/src/main/Gaussian.scala new file mode 100644 index 0000000000000..9bad02125aa31 --- /dev/null +++ b/modules/insight/src/main/Gaussian.scala @@ -0,0 +1,30 @@ +package lila.insight + +import math.{ Pi, log1p } +import org.apache.commons.math3.special.Erf.{ erf, erfInv } +import scala.math.{ sqrt, log } + +/** Represents a Gaussian distribution over a single real variable. */ +final class Gaussian(mu: Double, sigma: Double): + + def draw(): Double = mu + sigma * ornicar.scalalib.ThreadLocalRandom.nextGaussian() + + /** Computes the inverse cdf of the p-value for this gaussian. + * + * @param p: + * a probability in [0,1] + * @return + * x s.t. cdf(x) = numYes + */ + def inverseCdf(p: Double): Double = { + require(p >= 0) + require(p <= 1) + + mu + sigma * sqrt2 * erfInv(2 * p - 1) + } + + /** Computes the cumulative density function of the value x. + */ + def cdf(x: Double): Double = .5 * (1 + erf((x - mu) / (sqrt2 * sigma))) + + private val sqrt2 = math.sqrt(2.0) diff --git a/modules/insight/src/main/Question.scala b/modules/insight/src/main/Question.scala index 31c25d4b983c6..6b0312cfa7524 100644 --- a/modules/insight/src/main/Question.scala +++ b/modules/insight/src/main/Question.scala @@ -13,16 +13,14 @@ case class Question[X]( object Question: - private val peerDistribution = { + private val peerDistribution = // > db.insight.estimatedDocumentCount() // 277226337 // > db.insight.aggregate([{$sample:{size:10000000}},{$project:{_id:0,mr:1}},{$match:{mr:{$exists:1}}},{$group:{_id:null,ratings:{$avg:'$mr'}}}]) // { "_id" : null, "ratings" : 1878.1484907826184 } // > db.insight.aggregate([{$sample:{size:10000000}},{$project:{_id:0,mr:1}},{$match:{mr:{$exists:1}}},{$group:{_id:null,ratings:{$stdDevSamp:'$mr'}}}]) // { "_id" : null, "ratings" : 357.42969844387625 } - import breeze.stats.distributions.* - Gaussian(1878d, 357d)(using Rand) - } + Gaussian(1878d, 357d) case class Peers(rating: MeanRating): lazy val ratingRange: Range = diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 563a4f8f9a2ae..21ab001735199 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -29,11 +29,11 @@ object Dependencies { val lettuce = "io.lettuce" % "lettuce-core" % "6.2.4.RELEASE" val nettyTransport = "io.netty" % s"netty-transport-native-$notifier" % "4.1.91.Final" classifier s"$os-$arch" - val munit = "org.scalameta" %% "munit" % "1.0.0-M7" % Test - val uaparser = "org.uaparser" %% "uap-scala" % "0.14.0" - val apacheText = "org.apache.commons" % "commons-text" % "1.10.0" - val bloomFilter = "com.github.alexandrnikitin" %% "bloom-filter" % "0.13.1_lila-1" - val breeze = "org.scalanlp" %% "breeze" % "2.1.0" + val munit = "org.scalameta" %% "munit" % "1.0.0-M7" % Test + val uaparser = "org.uaparser" %% "uap-scala" % "0.14.0" + val apacheText = "org.apache.commons" % "commons-text" % "1.10.0" + val apacheMath = "org.apache.commons" % "commons-math3" % "3.6.1" + val bloomFilter = "com.github.alexandrnikitin" %% "bloom-filter" % "0.13.1_lila-1" object tests { val bundle = Seq(munit)