diff --git a/build.sbt b/build.sbt index 2eb52df..f09e14b 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ -name := "easyminer-discretization" +name := "EasyMiner-Discretization" -organization := "eu.easyminer" +organization := "com.github.KIZI" version := "1.1.0" diff --git a/src/main/java/eu/easyminer/discretization/Interval.java b/src/main/java/eu/easyminer/discretization/Interval.java index 3cc6f78..6c46e0b 100644 --- a/src/main/java/eu/easyminer/discretization/Interval.java +++ b/src/main/java/eu/easyminer/discretization/Interval.java @@ -17,4 +17,6 @@ public interface Interval { Boolean isRightBoundClosed(); + Boolean isInInterval(double value); + } diff --git a/src/main/scala/eu/easyminer/discretization/algorithm/Discretization.scala b/src/main/scala/eu/easyminer/discretization/algorithm/Discretization.scala index 3b6b0d1..fa33694 100644 --- a/src/main/scala/eu/easyminer/discretization/algorithm/Discretization.scala +++ b/src/main/scala/eu/easyminer/discretization/algorithm/Discretization.scala @@ -11,7 +11,7 @@ trait Discretization[T] { implicit val n: Numeric[T] - def discretize(data: Traversable[T]): Traversable[impl.Interval] + def discretize(data: Traversable[T]): Array[impl.Interval] } @@ -19,7 +19,7 @@ object Discretization { object Exceptions { - class IllegalTypeOfIterable(expected: Class[_], given: Class[_]) extends Exception("Illegal type of input iterable. Expected: " + expected.getSimpleName + ", given: " + given.getSimpleName) + class IllegalTypeOfTraversable(expected: Class[_], given: Class[_]) extends Exception("Illegal type of input traversable. Expected: " + expected.getSimpleName + ", given: " + given.getSimpleName) object UnsupportedDiscretizationTask extends Exception("Unsupported discretization task.") diff --git a/src/main/scala/eu/easyminer/discretization/algorithm/DiscretizationTaskValidator.scala b/src/main/scala/eu/easyminer/discretization/algorithm/DiscretizationTaskValidator.scala index b140417..a20613e 100644 --- a/src/main/scala/eu/easyminer/discretization/algorithm/DiscretizationTaskValidator.scala +++ b/src/main/scala/eu/easyminer/discretization/algorithm/DiscretizationTaskValidator.scala @@ -2,7 +2,7 @@ package eu.easyminer.discretization.algorithm import eu.easyminer.discretization.DiscretizationTask import eu.easyminer.discretization.algorithm.DiscretizationTaskValidator.Exceptions.InvalidDiscretizationTask -import eu.easyminer.discretization.impl.{AbsoluteSupport, RelativeSupport, Support} +import eu.easyminer.discretization.impl.Support import eu.easyminer.discretization.task.{EquidistanceDiscretizationTask, EquifrequencyDiscretizationTask, EquisizeDiscretizationTask} /** @@ -33,8 +33,8 @@ object DiscretizationTaskValidator { implicit val equifrequencyDiscretizationTaskValidator: DiscretizationTaskValidator[EquifrequencyDiscretizationTask] = (dt: EquifrequencyDiscretizationTask) => throwIfFalse("Number of bins must be greater than zero.")(dt.getNumberOfBins > 0) implicit val equisizeDiscretizationTaskValidator: DiscretizationTaskValidator[EquisizeDiscretizationTask] = (dt: EquisizeDiscretizationTask) => (dt.getMinSupport: Support) match { - case AbsoluteSupport(s) => throwIfFalse("Absolute support must be greater than 1.")(s > 1) - case RelativeSupport(s) => throwIfFalse("Relative support must be greater than zero and lower than 1")(s > 0 && s < 1) + case Support.Absolute(s) => throwIfFalse("Absolute support must be greater than 1.")(s > 1) + case Support.Relative(s) => throwIfFalse("Relative support must be greater than zero and lower than 1")(s > 0 && s < 1) } } \ No newline at end of file diff --git a/src/main/scala/eu/easyminer/discretization/algorithm/EquidistantIntervals.scala b/src/main/scala/eu/easyminer/discretization/algorithm/EquidistantIntervals.scala index 2558c8f..df6e080 100644 --- a/src/main/scala/eu/easyminer/discretization/algorithm/EquidistantIntervals.scala +++ b/src/main/scala/eu/easyminer/discretization/algorithm/EquidistantIntervals.scala @@ -8,22 +8,18 @@ import eu.easyminer.discretization.impl.{Interval, IntervalBound} */ class EquidistantIntervals[T] private[algorithm](bins: Int)(implicit val n: Numeric[T]) extends Discretization[T] { - def discretize(data: Traversable[T]): Traversable[impl.Interval] = new Traversable[impl.Interval] { - def foreach[U](f: Interval => U): Unit = { - data.view - .map(x => (x, x)) - .reduceOption((x, y) => n.min(x._1, y._1) -> n.max(x._2, y._2)) - .map(x => n.toDouble(x._1) -> n.toDouble(x._2)) - .toIterator - .flatMap { case (min, max) => - val intervalSize = (max - min) / bins - for (binNumber <- 0 until bins) yield { - val leftBound = IntervalBound.Inclusive(min + intervalSize * binNumber) - val rightBound = if (binNumber + 1 == bins) IntervalBound.Inclusive(max) else IntervalBound.Exclusive(leftBound.value + intervalSize) - Interval(leftBound, rightBound) - } - }.foreach(f) - } - } + def discretize(data: Traversable[T]): Array[impl.Interval] = data.view + .map(x => (x, x)) + .reduceOption((x, y) => n.min(x._1, y._1) -> n.max(x._2, y._2)) + .map(x => n.toDouble(x._1) -> n.toDouble(x._2)) + .toIterator + .flatMap { case (min, max) => + val intervalSize = (max - min) / bins + for (binNumber <- 0 until bins) yield { + val leftBound = IntervalBound.Inclusive(min + intervalSize * binNumber) + val rightBound = if (binNumber + 1 == bins) IntervalBound.Inclusive(max) else IntervalBound.Exclusive(leftBound.value + intervalSize) + Interval(leftBound, rightBound) + } + }.toArray } \ No newline at end of file diff --git a/src/main/scala/eu/easyminer/discretization/algorithm/EquifrequentIntervals.scala b/src/main/scala/eu/easyminer/discretization/algorithm/EquifrequentIntervals.scala index 3b5e880..c70b24f 100644 --- a/src/main/scala/eu/easyminer/discretization/algorithm/EquifrequentIntervals.scala +++ b/src/main/scala/eu/easyminer/discretization/algorithm/EquifrequentIntervals.scala @@ -1,30 +1,30 @@ package eu.easyminer.discretization.algorithm import eu.easyminer.discretization.algorithm.CutpointsResolver._ -import eu.easyminer.discretization.algorithm.Discretization.Exceptions.IllegalTypeOfIterable +import eu.easyminer.discretization.algorithm.Discretization.Exceptions.IllegalTypeOfTraversable import eu.easyminer.discretization.algorithm.IntervalSmoothing._ import eu.easyminer.discretization.impl.sorting.SortedTraversable -import eu.easyminer.discretization.impl.{InclusiveIntervalBound, Interval, IntervalFrequency, ValueFrequency} +import eu.easyminer.discretization.impl._ /** * Created by propan on 18. 3. 2017. */ class EquifrequentIntervals[T] private[algorithm](bins: Int)(implicit val n: Numeric[T]) extends Discretization[T] { - private def countOptimalFrequency(data: Iterable[T]) = { - val dataCount = data.iterator.size + private def countOptimalFrequency(data: Traversable[T]) = { + val dataCount = data.size math.ceil(dataCount / bins).toInt } - private def searchIntervals(data: Iterable[ValueFrequency[T]], optimalFrequency: Int) = { + private def searchIntervals(data: Traversable[ValueFrequency[T]], optimalFrequency: Int) = { val intervals = new collection.mutable.ArrayBuffer[IntervalFrequency](bins) - for (value <- data.iterator) { + for (value <- data) { intervals .lastOption .filter(interval => intervals.length == bins || math.abs(optimalFrequency - (interval.frequency + value.frequency)) < math.abs(optimalFrequency - interval.frequency)) match { - case Some(interval) => intervals.update(intervals.length - 1, IntervalFrequency(interval.interval.copy(maxValue = InclusiveIntervalBound(n.toDouble(value.value))), interval.frequency + value.frequency)) + case Some(interval) => intervals.update(intervals.length - 1, IntervalFrequency(interval.interval.copy(maxValue = IntervalBound.Inclusive(n.toDouble(value.value))), interval.frequency + value.frequency)) case None => - val leftRightBound = InclusiveIntervalBound(n.toDouble(value.value)) + val leftRightBound = IntervalBound.Inclusive(n.toDouble(value.value)) intervals += IntervalFrequency(Interval(leftRightBound, leftRightBound), value.frequency) } } @@ -60,14 +60,14 @@ class EquifrequentIntervals[T] private[algorithm](bins: Int)(implicit val n: Num } - def discretize(data: Traversable[T]): Traversable[Interval] = data match { + def discretize(data: Traversable[T]): Array[Interval] = data match { case data: SortedTraversable[T] => val optimalFrequency = countOptimalFrequency(data) val intervals = searchIntervals(data, optimalFrequency) smoothIntervals(intervals, data, 1000000)(canItMoveLeft(optimalFrequency))(canItMoveRight(optimalFrequency)) resolveCutpoints(intervals) - intervals.iterator.map(_.interval).toList - case _ => throw new IllegalTypeOfIterable(classOf[SortedTraversable[T]], data.getClass) + intervals.iterator.map(_.interval).toArray + case _ => throw new IllegalTypeOfTraversable(classOf[SortedTraversable[T]], data.getClass) } } \ No newline at end of file diff --git a/src/main/scala/eu/easyminer/discretization/algorithm/EquisizedIntervals.scala b/src/main/scala/eu/easyminer/discretization/algorithm/EquisizedIntervals.scala index 2cf5683..a1ba3b0 100644 --- a/src/main/scala/eu/easyminer/discretization/algorithm/EquisizedIntervals.scala +++ b/src/main/scala/eu/easyminer/discretization/algorithm/EquisizedIntervals.scala @@ -1,7 +1,7 @@ package eu.easyminer.discretization.algorithm import eu.easyminer.discretization.algorithm.CutpointsResolver._ -import eu.easyminer.discretization.algorithm.Discretization.Exceptions.IllegalTypeOfIterable +import eu.easyminer.discretization.algorithm.Discretization.Exceptions.IllegalTypeOfTraversable import eu.easyminer.discretization.algorithm.IntervalSmoothing._ import eu.easyminer.discretization.impl._ import eu.easyminer.discretization.impl.sorting.SortedTraversable @@ -11,20 +11,20 @@ import eu.easyminer.discretization.impl.sorting.SortedTraversable */ class EquisizedIntervals[T] private[algorithm](minSupport: Support)(implicit val n: Numeric[T]) extends Discretization[T] { - private def countOptimalFrequency(data: Iterable[T]) = minSupport match { - case RelativeSupport(minSupport) => math.ceil(data.iterator.size * minSupport).toInt - case AbsoluteSupport(minSupport) => minSupport + private def countOptimalFrequency(data: Traversable[T]) = minSupport match { + case Support.Relative(minSupport) => math.ceil(data.size * minSupport).toInt + case Support.Absolute(minSupport) => minSupport } - private def searchIntervals(data: Iterable[ValueFrequency[T]], optimalFrequency: Int) = { + private def searchIntervals(data: Traversable[ValueFrequency[T]], optimalFrequency: Int) = { val intervals = new collection.mutable.ArrayBuffer[IntervalFrequency]() - for (value <- data.iterator) { + for (value <- data) { intervals .lastOption .filter(interval => interval.frequency < optimalFrequency) match { - case Some(interval) => intervals.update(intervals.length - 1, IntervalFrequency(interval.interval.copy(maxValue = InclusiveIntervalBound(n.toDouble(value.value))), interval.frequency + value.frequency)) + case Some(interval) => intervals.update(intervals.length - 1, IntervalFrequency(interval.interval.copy(maxValue = IntervalBound.Inclusive(n.toDouble(value.value))), interval.frequency + value.frequency)) case None => - val leftRightBound = InclusiveIntervalBound(n.toDouble(value.value)) + val leftRightBound = IntervalBound.Inclusive(n.toDouble(value.value)) intervals += IntervalFrequency(Interval(leftRightBound, leftRightBound), value.frequency) } } @@ -52,14 +52,14 @@ class EquisizedIntervals[T] private[algorithm](minSupport: Support)(implicit val decreasedIntervalFreqency >= optimalFrequency && nextDifference < currentDifference } - def discretize(data: Iterable[T]): Seq[Interval] = data match { + def discretize(data: Traversable[T]): Array[Interval] = data match { case data: SortedTraversable[T] => val optimalFrequency = countOptimalFrequency(data) val intervals = searchIntervals(data, optimalFrequency) smoothIntervals(intervals, data, 1000000)(canItMoveLeft(optimalFrequency))(canItMoveRight(optimalFrequency)) resolveCutpoints(intervals) - intervals.iterator.map(_.interval).toList - case _ => throw new IllegalTypeOfIterable(classOf[SortedTraversable[T]], data.getClass) + intervals.iterator.map(_.interval).toArray + case _ => throw new IllegalTypeOfTraversable(classOf[SortedTraversable[T]], data.getClass) } } \ No newline at end of file diff --git a/src/main/scala/eu/easyminer/discretization/algorithm/IntervalSmoothing.scala b/src/main/scala/eu/easyminer/discretization/algorithm/IntervalSmoothing.scala index c8906e9..18030bd 100644 --- a/src/main/scala/eu/easyminer/discretization/algorithm/IntervalSmoothing.scala +++ b/src/main/scala/eu/easyminer/discretization/algorithm/IntervalSmoothing.scala @@ -3,7 +3,7 @@ package eu.easyminer.discretization.algorithm import java.util import eu.easyminer.discretization.impl.sorting.SortedTraversable -import eu.easyminer.discretization.impl.{InclusiveIntervalBound, IntervalFrequency, ValueFrequency} +import eu.easyminer.discretization.impl.{IntervalBound, IntervalFrequency, ValueFrequency} import eu.easyminer.discretization.util.NumericByteArray._ /** @@ -17,7 +17,7 @@ trait IntervalSmoothing { (implicit n: Numeric[T]): Unit = { if (bufferSize < 32) throw new IllegalArgumentException("Buffer size for smoothing must be greater than 31 bytes.") //input data are converted into ValueFrequency - it is aggregated distinct values with their count - val groupedData: Iterable[ValueFrequency[T]] = records + val groupedData: Traversable[ValueFrequency[T]] = records //values buffer for faster smoothing iteration val buffer = new util.LinkedList[ValueFrequency[T]]() //miximal number of values in the buffer @@ -25,7 +25,7 @@ trait IntervalSmoothing { //smooth until there are no interval changes val iterates = Iterator.continually { //within each smoothing iteration all sorted data are iterated - groupedData.iterator.foldLeft(0, false) { case ((pointer, isChanged), currentValue) => + groupedData.foldLeft(0, false) { case ((pointer, isChanged), currentValue) => if (pointer < intervals.length - 1) { //we have two intervals to compare val leftInterval = intervals(pointer) @@ -42,11 +42,11 @@ trait IntervalSmoothing { pointer + 1 } //this method moves right interval border into the left interval - def moveToLeft() = { + def moveToLeft(): Unit = { //new left interval has right border as prevValue = add prev value into the left interval intervals.update(pointer, IntervalFrequency(leftInterval.interval.copy(maxValue = rightInterval.interval.minValue), leftInterval.frequency + prevValue.get.frequency)) //new right interval has left border as currentValue = remove prev value from the right interval - intervals.update(pointer + 1, IntervalFrequency(rightInterval.interval.copy(minValue = InclusiveIntervalBound(n.toDouble(currentValue.value))), rightInterval.frequency - prevValue.get.frequency)) + intervals.update(pointer + 1, IntervalFrequency(rightInterval.interval.copy(minValue = IntervalBound.Inclusive(n.toDouble(currentValue.value))), rightInterval.frequency - prevValue.get.frequency)) } //this method moves left interval borders into the right interval //it moves border from all items in the buffer until condition @@ -59,7 +59,7 @@ trait IntervalSmoothing { val currentValue = buffer.pollFirst() val prevValue = buffer.getFirst //new left interval has right border as prevValue = delete current from the left interval - val newLeftInterval = IntervalFrequency(leftInterval.interval.copy(maxValue = InclusiveIntervalBound(n.toDouble(prevValue.value))), leftInterval.frequency - currentValue.frequency) + val newLeftInterval = IntervalFrequency(leftInterval.interval.copy(maxValue = IntervalBound.Inclusive(n.toDouble(prevValue.value))), leftInterval.frequency - currentValue.frequency) //new right interval has left border as currentValue = add current into the right interval val newRightInterval = IntervalFrequency(rightInterval.interval.copy(minValue = leftInterval.interval.maxValue), rightInterval.frequency + currentValue.frequency) //do it again diff --git a/src/main/scala/eu/easyminer/discretization/impl/DefaultDiscretization.scala b/src/main/scala/eu/easyminer/discretization/impl/DefaultDiscretization.scala index 512d769..1252eeb 100644 --- a/src/main/scala/eu/easyminer/discretization/impl/DefaultDiscretization.scala +++ b/src/main/scala/eu/easyminer/discretization/impl/DefaultDiscretization.scala @@ -5,7 +5,7 @@ import java.io.File import eu.easyminer.discretization import eu.easyminer.discretization.algorithm.{Discretization, EquidistantIntervals, EquifrequentIntervals, EquisizedIntervals} import eu.easyminer.discretization.impl.IterableConversions._ -import eu.easyminer.discretization.impl.sorting.{ReversableSortedTraversable, SortedInMemoryNumericTraversable, SortedPersistentNumericTraversable} +import eu.easyminer.discretization.impl.sorting.{SortedInMemoryNumericTraversable, SortedPersistentNumericTraversable} import eu.easyminer.discretization.{Discretizable, DiscretizationTask} import scala.language.implicitConversions @@ -26,18 +26,18 @@ object DefaultDiscretization extends Discretizable { implicit val c: java.util.Iterator[A] => Iterator[B] = javaIteratorToIterator[A, B] val dt = Discretization(discretizationTask) dt match { - case dt: EquidistantIntervals[B] => dt.discretize(data.asScala) + case dt: EquidistantIntervals[B] => dt.discretize(data.asScala).toArray case _: EquifrequentIntervals[B] | _: EquisizedIntervals[B] => data match { case data: discretization.SortedIterable[A] with discretization.PersistentIterable[A] => - SortedPersistentNumericTraversable[B, Seq[Interval]](data, file)(dt.discretize) + SortedPersistentNumericTraversable[B, Traversable[Interval]](data, file)(dt.discretize).toArray case data: discretization.InMemoryIterable[A] => - dt.discretize(SortedInMemoryNumericTraversable(data.iterator(), discretizationTask.getBufferSize)) + dt.discretize(SortedInMemoryNumericTraversable(data.asScala, discretizationTask.getBufferSize)).toArray case data: discretization.ReversableSortedIterable[A] => - dt.discretize(data: ReversableSortedTraversable[B]) + dt.discretize(data.asScala).toArray case data: discretization.SortedIterable[A] => - SortedPersistentNumericTraversable[B, Seq[Interval]](data, file)(dt.discretize) + dt.discretize(data.asScala).toArray case _ => - SortedPersistentNumericTraversable[B, Seq[Interval]](data.iterator(), directory, discretizationTask.getBufferSize)(dt.discretize) + SortedPersistentNumericTraversable[B, Traversable[Interval]](data.asScala, directory, discretizationTask.getBufferSize)(dt.discretize).toArray } case _ => Array() } diff --git a/src/main/scala/eu/easyminer/discretization/impl/Interval.scala b/src/main/scala/eu/easyminer/discretization/impl/Interval.scala index 37b53c2..4db2d50 100644 --- a/src/main/scala/eu/easyminer/discretization/impl/Interval.scala +++ b/src/main/scala/eu/easyminer/discretization/impl/Interval.scala @@ -1,30 +1,34 @@ package eu.easyminer.discretization.impl -import eu.easyminer.discretization +import java.lang -import scala.language.implicitConversions +import eu.easyminer.discretization /** * Created by propan on 16. 3. 2017. */ -case class Interval(minValue: IntervalBound, maxValue: IntervalBound) - -object Interval { +case class Interval(minValue: IntervalBound, maxValue: IntervalBound) extends discretization.Interval { + def getLeftBoundValue: lang.Double = minValue.value - implicit def intervalToJavaInterval(interval: Interval): discretization.Interval = new discretization.Interval { - def getLeftBoundValue: java.lang.Double = interval.minValue.value + def getRightBoundValue: lang.Double = maxValue.value - def getRightBoundValue: java.lang.Double = interval.maxValue.value + def isLeftBoundOpened: lang.Boolean = minValue.isInstanceOf[IntervalBound.Exclusive] - def isLeftBoundClosed: java.lang.Boolean = interval.minValue.isInstanceOf[IntervalBound.Inclusive] + def isRightBoundOpened: lang.Boolean = maxValue.isInstanceOf[IntervalBound.Exclusive] - def isRightBoundClosed: java.lang.Boolean = interval.maxValue.isInstanceOf[IntervalBound.Inclusive] + def isLeftBoundClosed: lang.Boolean = !isLeftBoundOpened - def isLeftBoundOpened: java.lang.Boolean = !isLeftBoundClosed + def isRightBoundClosed: lang.Boolean = !isRightBoundOpened - def isRightBoundOpened: java.lang.Boolean = !isRightBoundClosed + def isInInterval(value: Double): lang.Boolean = { + val isGtMinValue = minValue match { + case IntervalBound.Inclusive(x) => value >= x + case IntervalBound.Exclusive(x) => value > x + } + val isLtMaxValue = maxValue match { + case IntervalBound.Inclusive(x) => value <= x + case IntervalBound.Exclusive(x) => value < x + } + isGtMinValue && isLtMaxValue } - - implicit def seqIntervalsToArrayJavaIntervals(intervals: Seq[Interval]): Array[discretization.Interval] = intervals.iterator.map(x => x: discretization.Interval).toArray - } \ No newline at end of file diff --git a/src/main/scala/eu/easyminer/discretization/impl/IterableConversions.scala b/src/main/scala/eu/easyminer/discretization/impl/IterableConversions.scala index 2885141..265e2bc 100644 --- a/src/main/scala/eu/easyminer/discretization/impl/IterableConversions.scala +++ b/src/main/scala/eu/easyminer/discretization/impl/IterableConversions.scala @@ -17,13 +17,13 @@ trait IterableConversions { implicit class PimpedJavaIterable[A <: Number](it: java.lang.Iterable[A]) { - def asScala[B](implicit n: Numeric[B], numberToScalaNumber: A => B): Iterable[B] = { + def asScala[B](implicit n: Numeric[B], numberToScalaNumber: A => B): Traversable[B] = { implicit val c: java.util.Iterator[A] => Iterator[B] = javaIteratorToIterator[A, B] it match { case it: ReversableSortedIterable[A] => it: sorting.ReversableSortedTraversable[B] case it: SortedIterable[A] => it: sorting.SortedTraversable[B] - case _ => new Iterable[B] { - def iterator: Iterator[B] = it.iterator() + case _ => new Traversable[B] { + def foreach[U](f: B => U): Unit = it.iterator().foreach(f) } } } diff --git a/src/main/scala/eu/easyminer/discretization/impl/PersistentNumericTraversable.scala b/src/main/scala/eu/easyminer/discretization/impl/PersistentNumericTraversable.scala index 7b8aa5e..aa6be93 100644 --- a/src/main/scala/eu/easyminer/discretization/impl/PersistentNumericTraversable.scala +++ b/src/main/scala/eu/easyminer/discretization/impl/PersistentNumericTraversable.scala @@ -9,7 +9,7 @@ import eu.easyminer.discretization.util.PersistentTraversableOps._ * Created by propan on 17. 3. 2017. */ class PersistentNumericTraversable[T] private(col: Traversable[T], file: File)(implicit n: Numeric[T]) extends Traversable[T] { - //implicit private val b2n: Array[Byte] => T = byteArrayToNumber[T] + implicit private val b2n: Array[Byte] => T = byteArrayToNumber[T] def foreach[U](f: T => U): Unit = if (file.exists()) inputStreamTraversable[T](new FileInputStream(file)).foreach(f) else outputStreamTraversable(col, new FileOutputStream(file)) } diff --git a/src/main/scala/eu/easyminer/discretization/impl/Support.scala b/src/main/scala/eu/easyminer/discretization/impl/Support.scala index 4fc5839..2e90667 100644 --- a/src/main/scala/eu/easyminer/discretization/impl/Support.scala +++ b/src/main/scala/eu/easyminer/discretization/impl/Support.scala @@ -11,13 +11,13 @@ sealed trait Support object Support { - implicit def javaSupportToSupport(support: discretization.Support): Support = support match { - case s: discretization.RelativeSupport => RelativeSupport(s.getSupport) - case s: discretization.AbsoluteSupport => AbsoluteSupport(s.getSupport) - } + case class Relative(support: Double) extends Support -} + case class Absolute(support: Int) extends Support -case class RelativeSupport(support: Double) extends Support + implicit def javaSupportToSupport(support: discretization.Support): Support = support match { + case s: discretization.RelativeSupport => Relative(s.getSupport) + case s: discretization.AbsoluteSupport => Absolute(s.getSupport) + } -case class AbsoluteSupport(support: Int) extends Support \ No newline at end of file +} \ No newline at end of file diff --git a/src/main/scala/eu/easyminer/discretization/impl/ValueFrequency.scala b/src/main/scala/eu/easyminer/discretization/impl/ValueFrequency.scala index d4cc828..e04f42d 100644 --- a/src/main/scala/eu/easyminer/discretization/impl/ValueFrequency.scala +++ b/src/main/scala/eu/easyminer/discretization/impl/ValueFrequency.scala @@ -11,28 +11,18 @@ case class ValueFrequency[T](value: T, frequency: Int) object ValueFrequency { - implicit def sortedIterableToValueFrequencyIterable[T](it: SortedTraversable[T])(implicit n: Numeric[T]): Iterable[ValueFrequency[T]] = new Iterable[ValueFrequency[T]] { - def iterator: Iterator[ValueFrequency[T]] = new Iterator[ValueFrequency[T]] { - val _it = it.iterator - var lastValue = Option.empty[T] - - @scala.annotation.tailrec - def loadValueFrequency(v: ValueFrequency[T]): ValueFrequency[T] = if (_it.hasNext) { - val x = _it.next() - if (n.equiv(x, v.value)) { - loadValueFrequency(v.copy(frequency = v.frequency + 1)) - } else { - lastValue = Some(x) - v + implicit def sortedTraversableToValueFrequencyTraversable[T](it: SortedTraversable[T])(implicit n: Numeric[T]): Traversable[ValueFrequency[T]] = new Traversable[ValueFrequency[T]] { + def foreach[U](f: ValueFrequency[T] => U): Unit = { + var lastValue: Option[ValueFrequency[T]] = None + for (value <- it) { + lastValue match { + case Some(x) if n.equiv(x.value, value) => lastValue = Some(x.copy(frequency = x.frequency + 1)) + case _ => + lastValue.foreach(f) + lastValue = Some(ValueFrequency(value, 1)) } - } else { - lastValue = None - v } - - def hasNext: Boolean = _it.hasNext || lastValue.nonEmpty - - def next(): ValueFrequency[T] = loadValueFrequency(ValueFrequency(lastValue.getOrElse(_it.next()), 1)) + lastValue.foreach(f) } } diff --git a/src/main/scala/eu/easyminer/discretization/impl/sorting/SortedPersistentNumericTraversable.scala b/src/main/scala/eu/easyminer/discretization/impl/sorting/SortedPersistentNumericTraversable.scala index 7af01f4..fac36f2 100644 --- a/src/main/scala/eu/easyminer/discretization/impl/sorting/SortedPersistentNumericTraversable.scala +++ b/src/main/scala/eu/easyminer/discretization/impl/sorting/SortedPersistentNumericTraversable.scala @@ -13,6 +13,7 @@ import eu.easyminer.discretization.util.ReversedFileInputStream object SortedPersistentNumericTraversable { def apply[A, B](col: Traversable[A], directory: File, bufferSize: Int)(f: ReversableSortedTraversable[A] => B)(implicit n: Numeric[A]): B = { + implicit val b2n: Array[Byte] => A = byteArrayToNumber[A] val ems = new ExternalMergeSort(bufferSize) lazy val sortedFile = ems.sort(col, directory) try { @@ -31,6 +32,7 @@ object SortedPersistentNumericTraversable { } def apply[A, B](col: SortedTraversable[A], file: File)(f: ReversableSortedTraversable[A] => B)(implicit n: Numeric[A]): B = PersistentNumericTraversable(col, file) { col => + implicit val b2n: Array[Byte] => A = byteArrayToNumber[A] val rst = new ReversableSortedTraversable[A]( col, new Traversable[A] {