diff --git a/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.csproj b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.csproj index 334aa94c69..6f74b2057b 100644 --- a/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.csproj +++ b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.csproj @@ -125,6 +125,8 @@ + + diff --git a/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator.cs b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator.cs new file mode 100644 index 0000000000..20966b2122 --- /dev/null +++ b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator.cs @@ -0,0 +1,158 @@ +#region License Information +/* HeuristicLab + * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) + * + * This file is part of HeuristicLab. + * + * HeuristicLab is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HeuristicLab is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HeuristicLab. If not, see . + */ +#endregion + +using System; +using System.Collections.Generic; +using System.Linq; +using HEAL.Attic; +using HeuristicLab.Common; +using HeuristicLab.Core; +using HeuristicLab.Data; +using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; +using HeuristicLab.Parameters; +using HeuristicLab.PluginInfrastructure; + +namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification { + [NonDiscoverableType] + [Item("Weighted Performance Measures Evaluator", "Calculates the quality of a symbolic classification solution based on three weighted measures(normalized mean squared error, false negative rate(1-sensitivity) and false positve rate(1-specificity)).")] + [StorableType("0772F316-5E12-4153-857E-8625069B4677")] + public class SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator : SymbolicClassificationSingleObjectiveEvaluator { + private const string NormalizedMeanSquaredErrorWeightingFactorParameterName = "NormalizedMeanSquaredErrorWeightingFactor"; + private const string FalseNegativeRateWeightingFactorParameterName = "FalseNegativeRateWeightingFactor"; + private const string FalsePositiveRateWeightingFactorParameterName = "FalsePositiveRateWeightingFactor"; + private const string ModelCreatorParameterName = "ModelCreator"; + + public override bool Maximization { get { return false; } } + + #region parameter properties + public IFixedValueParameter NormalizedMeanSquaredErrorWeightingFactorParameter { + get { return (IFixedValueParameter)Parameters[NormalizedMeanSquaredErrorWeightingFactorParameterName]; } + } + public IFixedValueParameter FalseNegativeRateWeightingFactorParameter { + get { return (IFixedValueParameter)Parameters[FalseNegativeRateWeightingFactorParameterName]; } + } + public IFixedValueParameter FalsePositiveRateWeightingFactorParameter { + get { return (IFixedValueParameter)Parameters[FalsePositiveRateWeightingFactorParameterName]; } + } + public IValueLookupParameter ModelCreatorParameter { + get { return (IValueLookupParameter)Parameters[ModelCreatorParameterName]; } + } + #endregion + + public double NormalizedMeanSquaredErrorWeightingFactor { + get { return NormalizedMeanSquaredErrorWeightingFactorParameter.Value.Value; } + } + public double FalseNegativeRateWeightingFactor { + get { return FalseNegativeRateWeightingFactorParameter.Value.Value; } + } + public double FalsePositiveRateWeightingFactor { + get { return FalsePositiveRateWeightingFactorParameter.Value.Value; } + } + + [StorableConstructor] + protected SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator(StorableConstructorFlag _) : base(_) { } + protected SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator(SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator original, Cloner cloner) + : base(original, cloner) { + } + public override IDeepCloneable Clone(Cloner cloner) { + return new SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator(this, cloner); + } + + public SymbolicClassificationSingleObjectiveWeightedPerformanceMeasuresEvaluator() + : base() { + Parameters.Add(new FixedValueParameter(NormalizedMeanSquaredErrorWeightingFactorParameterName, "The weighting factor of the normalized mean squared error.", new DoubleValue(1))); + Parameters.Add(new FixedValueParameter(FalseNegativeRateWeightingFactorParameterName, "The weighting factor of the false negative rate (1-sensitivity).", new DoubleValue(1))); + Parameters.Add(new FixedValueParameter(FalsePositiveRateWeightingFactorParameterName, "The weighting factor of the false positive rate (1-specificity).", new DoubleValue(1))); + Parameters.Add(new ValueLookupParameter(ModelCreatorParameterName, "The model creator which is used during the evaluations.")); + } + + public override IOperation InstrumentedApply() { + IEnumerable rows = GenerateRowsToEvaluate(); + var tree = SymbolicExpressionTreeParameter.ActualValue; + var creator = ModelCreatorParameter.ActualValue; + var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue; + var estimationLimits = EstimationLimitsParameter.ActualValue; + var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value; + + + double quality = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, + ProblemDataParameter.ActualValue, rows, applyLinearScaling, creator, NormalizedMeanSquaredErrorWeightingFactor, FalseNegativeRateWeightingFactor, FalsePositiveRateWeightingFactor); + QualityParameter.ActualValue = new DoubleValue(quality); + return base.InstrumentedApply(); + } + + public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, + IEnumerable rows, bool applyLinearScaling, ISymbolicClassificationModelCreator modelCreator, double normalizedMeanSquaredErrorWeightingFactor, double falseNegativeRateWeightingFactor, double falsePositiveRateWeightingFactor) { + var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows); + var targetClassValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); + var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit).ToArray(); + OnlineCalculatorError errorState; + double nmse; + + //calculate performance measures + string positiveClassName = problemData.PositiveClass; + double[] classValues, thresholds; + IEnumerable estimatedClassValues = null; + ISymbolicDiscriminantFunctionClassificationModel m; + + var model = modelCreator.CreateSymbolicClassificationModel(problemData.TargetVariable, tree, interpreter, lowerEstimationLimit, upperEstimationLimit); + if ((m = model as ISymbolicDiscriminantFunctionClassificationModel) != null) { + m.ThresholdCalculator.Calculate(problemData, boundedEstimatedValues, targetClassValues, out classValues, out thresholds); + m.SetThresholdsAndClassValues(thresholds, classValues); + estimatedClassValues = m.GetEstimatedClassValues(boundedEstimatedValues); + } else { + model.RecalculateModelParameters(problemData, rows); + estimatedClassValues = model.GetEstimatedClassValues(problemData.Dataset, rows); + } + + var performanceCalculator = new ClassificationPerformanceMeasuresCalculator(positiveClassName, problemData.GetClassValue(positiveClassName)); + performanceCalculator.Calculate(targetClassValues, estimatedClassValues); + if (performanceCalculator.ErrorState != OnlineCalculatorError.None) + return Double.NaN; + double falseNegativeRate = 1 - performanceCalculator.TruePositiveRate; + double falsePositiveRate = performanceCalculator.FalsePositiveRate; + + if (applyLinearScaling) { + throw new NotSupportedException("The Weighted Performance Measures Evaluator does not suppport linear scaling!"); + } + nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetClassValues, boundedEstimatedValues, out errorState); + if (errorState != OnlineCalculatorError.None) return Double.NaN; + return normalizedMeanSquaredErrorWeightingFactor * nmse + falseNegativeRateWeightingFactor * falseNegativeRate + falsePositiveRateWeightingFactor * falsePositiveRate; + } + + public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IClassificationProblemData problemData, IEnumerable rows) { + SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context; + EstimationLimitsParameter.ExecutionContext = context; + ApplyLinearScalingParameter.ExecutionContext = context; + ModelCreatorParameter.ExecutionContext = context; + + double quality = Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, + problemData, rows, ApplyLinearScalingParameter.ActualValue.Value, ModelCreatorParameter.ActualValue, NormalizedMeanSquaredErrorWeightingFactorParameter.Value.Value, FalseNegativeRateWeightingFactor, FalsePositiveRateWeightingFactor); + + SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null; + EstimationLimitsParameter.ExecutionContext = null; + ApplyLinearScalingParameter.ExecutionContext = null; + ModelCreatorParameter.ExecutionContext = null; + + return quality; + } + } +} diff --git a/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator.cs b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator.cs new file mode 100644 index 0000000000..3b0bfbf511 --- /dev/null +++ b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator.cs @@ -0,0 +1,141 @@ +#region License Information +/* HeuristicLab + * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) + * + * This file is part of HeuristicLab. + * + * HeuristicLab is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HeuristicLab is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HeuristicLab. If not, see . + */ +#endregion + +using System; +using System.Collections.Generic; +using System.Linq; +using HEAL.Attic; +using HeuristicLab.Common; +using HeuristicLab.Core; +using HeuristicLab.Data; +using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; +using HeuristicLab.Parameters; +using HeuristicLab.PluginInfrastructure; + +namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification { + [NonDiscoverableType] + [Item("Weighted Residuals Mean Squared Error Evaluator", @"A modified mean squared error evaluator that enables the possibility to weight residuals differently. +The first residual category belongs to estimated values which definitely belong to a specific class because the estimated value is located above the maximum or below the minimum of all the class values (DefiniteResidualsWeight). +The second residual category represents residuals which belong to the positive class whereby the estimated value is located between the positive and a negative class (PositiveClassResidualsWeight). +All other cases are represented by the third category (NegativeClassesResidualsWeight). +The weight gets multiplied to the squared error. Note that the Evaluator acts like a normal MSE-Evaluator if all the weights are set to 1.")] + [StorableType("A3193296-1A0F-46E2-8F43-22E2ED9CFFC5")] + public sealed class SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator : SymbolicClassificationSingleObjectiveEvaluator { + private const string DefiniteResidualsWeightParameterName = "DefiniteResidualsWeight"; + private const string PositiveClassResidualsWeightParameterName = "PositiveClassResidualsWeight"; + private const string NegativeClassesResidualsWeightParameterName = "NegativeClassesResidualsWeight"; + [StorableConstructor] + private SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator(StorableConstructorFlag _) : base(_) { } + private SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator(SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator original, Cloner cloner) + : base(original, cloner) { + } + public override IDeepCloneable Clone(Cloner cloner) { + return new SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator(this, cloner); + } + + public SymbolicClassificationSingleObjectiveWeightedResidualsMeanSquaredErrorEvaluator() + : base() { + Parameters.Add(new FixedValueParameter(DefiniteResidualsWeightParameterName, "Weight of residuals which definitely belong to a specific class because the estimated values is located above the maximum or below the minimum of all the class values.", new DoubleValue(1))); + Parameters.Add(new FixedValueParameter(PositiveClassResidualsWeightParameterName, "Weight of residuals which belong to the positive class whereby the estimated value is located between the positive and a negative class.", new DoubleValue(1))); + Parameters.Add(new FixedValueParameter(NegativeClassesResidualsWeightParameterName, "Weight of residuals which are not covered by the DefiniteResidualsWeight or the PositiveClassResidualsWeight.", new DoubleValue(1))); + } + + #region parameter properties + public IFixedValueParameter DefiniteResidualsWeightParameter { + get { return (IFixedValueParameter)Parameters[DefiniteResidualsWeightParameterName]; } + } + public IFixedValueParameter PositiveClassResidualsWeightParameter { + get { return (IFixedValueParameter)Parameters[PositiveClassResidualsWeightParameterName]; } + } + public IFixedValueParameter NegativeClassesResidualsWeightParameter { + get { return (IFixedValueParameter)Parameters[NegativeClassesResidualsWeightParameterName]; } + } + #endregion + + #region properties + public override bool Maximization { get { return false; } } + + public double DefiniteResidualsWeight { + get { return DefiniteResidualsWeightParameter.Value.Value; } + } + public double PositiveClassResidualsWeight { + get { return PositiveClassResidualsWeightParameter.Value.Value; } + } + public double NegativeClassesResidualsWeight { + get { return NegativeClassesResidualsWeightParameter.Value.Value; } + } + #endregion + + public override IOperation InstrumentedApply() { + IEnumerable rows = GenerateRowsToEvaluate(); + var solution = SymbolicExpressionTreeParameter.ActualValue; + double quality = Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, rows, ApplyLinearScalingParameter.ActualValue.Value, + DefiniteResidualsWeight, PositiveClassResidualsWeight, NegativeClassesResidualsWeight); + QualityParameter.ActualValue = new DoubleValue(quality); + return base.InstrumentedApply(); + } + + public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable rows, bool applyLinearScaling, + double definiteResidualsWeight, double positiveClassResidualsWeight, double negativeClassesResidualsWeight) { + IEnumerable estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows); + IEnumerable targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); + OnlineCalculatorError errorState; + + double positiveClassValue = problemData.GetClassValue(problemData.PositiveClass); + //get class values min/max + double classValuesMin = problemData.ClassValues.ElementAtOrDefault(0); + double classValuesMax = classValuesMin; + foreach (double classValue in problemData.ClassValues) { + if (classValuesMin > classValue) classValuesMin = classValue; + if (classValuesMax < classValue) classValuesMax = classValue; + } + + double quality; + if (applyLinearScaling) { + var calculator = new OnlineWeightedClassificationMeanSquaredErrorCalculator(positiveClassValue, classValuesMax, classValuesMin, + definiteResidualsWeight, positiveClassResidualsWeight, negativeClassesResidualsWeight); + CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, calculator, problemData.Dataset.Rows); + errorState = calculator.ErrorState; + quality = calculator.WeightedResidualsMeanSquaredError; + } else { + IEnumerable boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit); + quality = OnlineWeightedClassificationMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, positiveClassValue, classValuesMax, + classValuesMin, definiteResidualsWeight, positiveClassResidualsWeight, negativeClassesResidualsWeight, out errorState); + } + if (errorState != OnlineCalculatorError.None) return Double.NaN; + return quality; + } + + public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IClassificationProblemData problemData, IEnumerable rows) { + SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context; + EstimationLimitsParameter.ExecutionContext = context; + ApplyLinearScalingParameter.ExecutionContext = context; + + double quality = Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows, ApplyLinearScalingParameter.ActualValue.Value, DefiniteResidualsWeight, PositiveClassResidualsWeight, NegativeClassesResidualsWeight); + + SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null; + EstimationLimitsParameter.ExecutionContext = null; + ApplyLinearScalingParameter.ExecutionContext = null; + + return quality; + } + } +} diff --git a/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicDiscriminantFunctionClassificationModel.cs b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicDiscriminantFunctionClassificationModel.cs index d46d5b1348..4bc5ab8369 100644 --- a/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicDiscriminantFunctionClassificationModel.cs +++ b/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicDiscriminantFunctionClassificationModel.cs @@ -111,8 +111,12 @@ public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable } public override IEnumerable GetEstimatedClassValues(IDataset dataset, IEnumerable rows) { + var estimatedValues = GetEstimatedValues(dataset, rows); + return GetEstimatedClassValues(estimatedValues); + } + public IEnumerable GetEstimatedClassValues(IEnumerable estimatedValues) { if (!Thresholds.Any() && !ClassValues.Any()) throw new ArgumentException("No thresholds and class values were set for the current symbolic classification model."); - foreach (var x in GetEstimatedValues(dataset, rows)) { + foreach (var x in estimatedValues) { int classIndex = 0; // find first threshold value which is larger than x => class index = threshold index + 1 for (int i = 0; i < thresholds.Length; i++) { @@ -123,7 +127,6 @@ public override IEnumerable GetEstimatedClassValues(IDataset dataset, IE } } - public override ISymbolicClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { return CreateDiscriminantClassificationSolution(problemData); } diff --git a/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj b/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj index 64da2d452d..3769b425d0 100644 --- a/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj +++ b/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj @@ -243,6 +243,7 @@ + diff --git a/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationModel.cs b/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationModel.cs index 4760dcedbe..eb4552f0a4 100644 --- a/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationModel.cs +++ b/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationModel.cs @@ -22,9 +22,9 @@ using System; using System.Collections.Generic; using System.Linq; +using HEAL.Attic; using HeuristicLab.Common; using HeuristicLab.Core; -using HEAL.Attic; namespace HeuristicLab.Problems.DataAnalysis { /// @@ -120,8 +120,13 @@ public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable } public override IEnumerable GetEstimatedClassValues(IDataset dataset, IEnumerable rows) { + var estimatedValues = GetEstimatedValues(dataset, rows); + return GetEstimatedClassValues(estimatedValues); + } + + public virtual IEnumerable GetEstimatedClassValues(IEnumerable estimatedValues) { if (!Thresholds.Any() && !ClassValues.Any()) throw new ArgumentException("No thresholds and class values were set for the current classification model."); - foreach (var x in GetEstimatedValues(dataset, rows)) { + foreach (var x in estimatedValues) { int classIndex = 0; // find first threshold value which is larger than x => class index = threshold index + 1 for (int i = 0; i < thresholds.Length; i++) { @@ -131,6 +136,7 @@ public override IEnumerable GetEstimatedClassValues(IDataset dataset, IE yield return classValues.ElementAt(classIndex - 1); } } + #region events public event EventHandler ThresholdsChanged; protected virtual void OnThresholdsChanged(EventArgs e) { diff --git a/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IDiscriminantFunctionClassificationModel.cs b/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IDiscriminantFunctionClassificationModel.cs index b9dfcf0f95..7d00497da6 100644 --- a/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IDiscriminantFunctionClassificationModel.cs +++ b/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IDiscriminantFunctionClassificationModel.cs @@ -33,6 +33,7 @@ public interface IDiscriminantFunctionClassificationModel : IClassificationModel // class values and thresholds can only be assigned simultanously void SetThresholdsAndClassValues(IEnumerable thresholds, IEnumerable classValues); IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows); + IEnumerable GetEstimatedClassValues(IEnumerable estimatedValues); event EventHandler ThresholdsChanged; diff --git a/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineWeightedClassificationMeanSquaredErrorCalculator.cs b/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineWeightedClassificationMeanSquaredErrorCalculator.cs new file mode 100644 index 0000000000..bdf87a3397 --- /dev/null +++ b/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineWeightedClassificationMeanSquaredErrorCalculator.cs @@ -0,0 +1,115 @@ +#region License Information +/* HeuristicLab + * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) + * + * This file is part of HeuristicLab. + * + * HeuristicLab is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HeuristicLab is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HeuristicLab. If not, see . + */ +#endregion + +using System; +using System.Collections.Generic; +using HeuristicLab.Common; + +namespace HeuristicLab.Problems.DataAnalysis { + public class OnlineWeightedClassificationMeanSquaredErrorCalculator : IOnlineCalculator { + + private double sse; + private int n; + public double WeightedResidualsMeanSquaredError { + get { + return n > 0 ? sse / n : 0.0; + } + } + + public double PositiveClassValue { get; private set; } + public double ClassValuesMax { get; private set; } + public double ClassValuesMin { get; private set; } + public double DefiniteResidualsWeight { get; private set; } + public double PositiveClassResidualsWeight { get; private set; } + public double NegativeClassesResidualsWeight { get; private set; } + + public OnlineWeightedClassificationMeanSquaredErrorCalculator(double positiveClassValue, double classValuesMax, double classValuesMin, + double definiteResidualsWeight, double positiveClassResidualsWeight, double negativeClassesResidualsWeight) { + PositiveClassValue = positiveClassValue; + ClassValuesMax = classValuesMax; + ClassValuesMin = classValuesMin; + DefiniteResidualsWeight = definiteResidualsWeight; + PositiveClassResidualsWeight = positiveClassResidualsWeight; + NegativeClassesResidualsWeight = negativeClassesResidualsWeight; + Reset(); + } + + #region IOnlineCalculator Members + private OnlineCalculatorError errorState; + public OnlineCalculatorError ErrorState { + get { return errorState; } + } + public double Value { + get { return WeightedResidualsMeanSquaredError; } + } + public void Reset() { + n = 0; + sse = 0.0; + errorState = OnlineCalculatorError.InsufficientElementsAdded; + } + + public void Add(double original, double estimated) { + if (double.IsNaN(estimated) || double.IsInfinity(estimated) || + double.IsNaN(original) || double.IsInfinity(original) || (errorState & OnlineCalculatorError.InvalidValueAdded) > 0) { + errorState = errorState | OnlineCalculatorError.InvalidValueAdded; + } else { + double error = estimated - original; + double weight; + //apply weight + if (estimated > ClassValuesMax || estimated < ClassValuesMin) { + weight = DefiniteResidualsWeight; + } else if (original.IsAlmost(PositiveClassValue)) { + weight = PositiveClassResidualsWeight; + } else { + weight = NegativeClassesResidualsWeight; + } + sse += error * error * weight; + n++; + errorState = errorState & (~OnlineCalculatorError.InsufficientElementsAdded); // n >= 1 + } + } + #endregion + + public static double Calculate(IEnumerable originalValues, IEnumerable estimatedValues, double positiveClassValue, double classValuesMax, double classValuesMin, + double definiteResidualsWeight, double positiveClassResidualsWeight, double negativeClassesResidualsWeight, out OnlineCalculatorError errorState) { + IEnumerator originalEnumerator = originalValues.GetEnumerator(); + IEnumerator estimatedEnumerator = estimatedValues.GetEnumerator(); + OnlineWeightedClassificationMeanSquaredErrorCalculator calculator = new OnlineWeightedClassificationMeanSquaredErrorCalculator(positiveClassValue, classValuesMax, classValuesMin, definiteResidualsWeight, positiveClassResidualsWeight, negativeClassesResidualsWeight); + + // always move forward both enumerators (do not use short-circuit evaluation!) + while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) { + double original = originalEnumerator.Current; + double estimated = estimatedEnumerator.Current; + calculator.Add(original, estimated); + if (calculator.ErrorState != OnlineCalculatorError.None) break; + } + + // check if both enumerators are at the end to make sure both enumerations have the same length + if (calculator.ErrorState == OnlineCalculatorError.None && + (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())) { + throw new ArgumentException("Number of elements in originalValues and estimatedValues enumerations doesn't match."); + } else { + errorState = calculator.ErrorState; + return calculator.WeightedResidualsMeanSquaredError; + } + } + } +}