Skip to content

Commit

Permalink
replace 10MB breeze jar with simple gaussian implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
ornicar committed Apr 21, 2023
1 parent c5d96a7 commit bd7cc02
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 10 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ lazy val importer = module("importer",

lazy val insight = module("insight",
Seq(common, game, user, analyse, relation, pref, socket, round, security),
Seq(scalatags, breeze) ++ reactivemongo.bundle
Seq(scalatags, apacheMath) ++ reactivemongo.bundle
)

lazy val tutor = module("tutor",
Expand Down
30 changes: 30 additions & 0 deletions modules/insight/src/main/Gaussian.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package lila.insight

import math.{ Pi, log1p }
import org.apache.commons.math3.special.Erf.{ erf, erfInv }
import scala.math.{ sqrt, log }

/** Represents a Gaussian distribution over a single real variable. */
final class Gaussian(mu: Double, sigma: Double):

def draw(): Double = mu + sigma * ornicar.scalalib.ThreadLocalRandom.nextGaussian()

/** Computes the inverse cdf of the p-value for this gaussian.
*
* @param p:
* a probability in [0,1]
* @return
* x s.t. cdf(x) = numYes
*/
def inverseCdf(p: Double): Double = {
require(p >= 0)
require(p <= 1)

mu + sigma * sqrt2 * erfInv(2 * p - 1)
}

/** Computes the cumulative density function of the value x.
*/
def cdf(x: Double): Double = .5 * (1 + erf((x - mu) / (sqrt2 * sigma)))

private val sqrt2 = math.sqrt(2.0)
6 changes: 2 additions & 4 deletions modules/insight/src/main/Question.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@ case class Question[X](

object Question:

private val peerDistribution = {
private val peerDistribution =
// > db.insight.estimatedDocumentCount()
// 277226337
// > db.insight.aggregate([{$sample:{size:10000000}},{$project:{_id:0,mr:1}},{$match:{mr:{$exists:1}}},{$group:{_id:null,ratings:{$avg:'$mr'}}}])
// { "_id" : null, "ratings" : 1878.1484907826184 }
// > db.insight.aggregate([{$sample:{size:10000000}},{$project:{_id:0,mr:1}},{$match:{mr:{$exists:1}}},{$group:{_id:null,ratings:{$stdDevSamp:'$mr'}}}])
// { "_id" : null, "ratings" : 357.42969844387625 }
import breeze.stats.distributions.*
Gaussian(1878d, 357d)(using Rand)
}
Gaussian(1878d, 357d)

case class Peers(rating: MeanRating):
lazy val ratingRange: Range =
Expand Down
10 changes: 5 additions & 5 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ object Dependencies {
val lettuce = "io.lettuce" % "lettuce-core" % "6.2.4.RELEASE"
val nettyTransport =
"io.netty" % s"netty-transport-native-$notifier" % "4.1.91.Final" classifier s"$os-$arch"
val munit = "org.scalameta" %% "munit" % "1.0.0-M7" % Test
val uaparser = "org.uaparser" %% "uap-scala" % "0.14.0"
val apacheText = "org.apache.commons" % "commons-text" % "1.10.0"
val bloomFilter = "com.github.alexandrnikitin" %% "bloom-filter" % "0.13.1_lila-1"
val breeze = "org.scalanlp" %% "breeze" % "2.1.0"
val munit = "org.scalameta" %% "munit" % "1.0.0-M7" % Test
val uaparser = "org.uaparser" %% "uap-scala" % "0.14.0"
val apacheText = "org.apache.commons" % "commons-text" % "1.10.0"
val apacheMath = "org.apache.commons" % "commons-math3" % "3.6.1"
val bloomFilter = "com.github.alexandrnikitin" %% "bloom-filter" % "0.13.1_lila-1"

object tests {
val bundle = Seq(munit)
Expand Down

0 comments on commit bd7cc02

Please sign in to comment.