Skip to content

Commit

Permalink
switch housing dataset to wine (#170)
Browse files Browse the repository at this point in the history
* replace housing uci dataset to wine quality
  • Loading branch information
Ivanidzo4ka authored May 23, 2018
1 parent 76393f4 commit d51321c
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,5 @@ ASALocalRun/

# MSBuild Binary and Structured Log
*.binlog
# Ignore external test datasets.
/test/data/external/
18 changes: 16 additions & 2 deletions build.proj
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), Directory.Build.props))\Directory.Build.props" />

<Import Project="$(ToolsDir)VersionTools.targets" Condition="Exists('$(ToolsDir)VersionTools.targets')" />

<UsingTask TaskName="DownloadFilesFromUrl" AssemblyFile="$(ToolsDir)Microsoft.DotNet.Build.Tasks.dll"/>
<PropertyGroup>
<!-- To disable the restoration of packages, set RestoreDuringBuild=false or pass /p:RestoreDuringBuild=false.-->
<RestoreDuringBuild Condition="'$(RestoreDuringBuild)'==''">true</RestoreDuringBuild>
Expand All @@ -33,6 +33,7 @@
RestoreProjects;
BuildNative;
$(TraversalBuildDependsOn);
DownloadExternalTestFiles;
RunTests;
</TraversalBuildDependsOn>
</PropertyGroup>
Expand All @@ -56,13 +57,26 @@
<ItemGroup>
<PkgProject Include="pkg\**\*.nupkgproj" />
</ItemGroup>

<MSBuild Projects="@(PkgProject)"
Targets="Restore" />
<MSBuild Projects="@(PkgProject)"
Targets="Pack" />
</Target>

<ItemGroup>
<TestFile Include="$(MSBuildThisFileDirectory)/test/data/external/winequality-white.csv"
Url="https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
DestinationFile="$(MSBuildThisFileDirectory)test/data/external/winequality-white.csv" />
</ItemGroup>

<Target Name="DownloadExternalTestFiles" Inputs="@(TestFile)" Outputs="%(TestFile.DestinationFile)">
<Message Importance="High" Text="Downloading external test files... %(TestFile.DestinationFile)" />
<DownloadFilesFromUrl Items="@(TestFile)"
DestinationDir="test/data/external"
TreatErrorsAsWarnings="true"/>
</Target>

<Target Name="RunTests" Condition="'$(RunTests)'=='true'">
<MSBuild Projects="test\run-tests.proj"
Targets="RunTests" />
Expand Down
32 changes: 28 additions & 4 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using ML = Microsoft.ML;
using Microsoft.ML.Runtime;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.TestFramework;
Expand Down Expand Up @@ -269,10 +270,10 @@ public void TestCrossValidationBinaryMacro()
}
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact]
public void TestCrossValidationMacro()
{
var dataPath = GetDataPath(@"housing.txt");
var dataPath = GetDataPath(TestDatasets.winequality.trainFilename);
using (var env = new TlcEnvironment())
{
var subGraph = env.CreateExperiment();
Expand All @@ -295,7 +296,30 @@ public void TestCrossValidationMacro()
var modelCombineOutput = subGraph.Add(modelCombine);

var experiment = env.CreateExperiment();
var importInput = new ML.Data.TextLoader(dataPath);
var importInput = new ML.Data.TextLoader(dataPath)
{
Arguments = new TextLoaderArguments
{
Separator = new[] { ';' },
HasHeader = true,
Column = new[]
{
new TextLoaderColumn()
{
Name = "Label",
Source = new [] { new TextLoaderRange(11) },
Type = DataKind.Num
},

new TextLoaderColumn()
{
Name = "Features",
Source = new [] { new TextLoaderRange(0,10) },
Type = DataKind.Num
}
}
}
};
var importOutput = experiment.Add(importInput);

var crossValidate = new ML.Models.CrossValidator
Expand Down Expand Up @@ -324,7 +348,7 @@ public void TestCrossValidationMacro()
Assert.True(b);
double val = 0;
getter(ref val);
Assert.Equal(3.32, val, 1);
Assert.Equal(0.58, val, 1);
b = cursor.MoveNext();
Assert.False(b);
}
Expand Down
23 changes: 13 additions & 10 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ public void EntryPointTextToKeyToText()
}

private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath,
string instanceMetricsPath, string confusionMatrixPath = null)
string instanceMetricsPath, string confusionMatrixPath = null, string loader = null)
{
string inputGraph = string.Format(@"
{{
Expand All @@ -919,6 +919,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
'Name': 'Data.CustomTextLoader',
'Inputs': {{
'InputFile': '$file'
{8}
}},
'Outputs': {{
'Data': '$AllData'
Expand Down Expand Up @@ -978,7 +979,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
}}
}}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath),
confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "",
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "");
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "",
string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader));

var jsonPath = DeleteOutputPath("graph.json");
File.WriteAllLines(jsonPath, new[] { inputGraph });
Expand Down Expand Up @@ -1036,15 +1038,16 @@ public void EntryPointEvaluateMultiClass()
Assert.Equal(3, CountRows(loader));
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact]
public void EntryPointEvaluateRegression()
{
var dataPath = GetDataPath("housing.txt");
var dataPath = GetDataPath(TestDatasets.winequality.trainFilename);
var warningsPath = DeleteOutputPath("warnings.idv");
var overallMetricsPath = DeleteOutputPath("overall.idv");
var instanceMetricsPath = DeleteOutputPath("instance.idv");

RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath);
RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator",
dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: TestDatasets.winequality.loaderSettings);

using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath))
Assert.Equal(0, CountRows(loader));
Expand All @@ -1053,7 +1056,7 @@ public void EntryPointEvaluateRegression()
Assert.Equal(1, CountRows(loader));

using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath))
Assert.Equal(104, CountRows(loader));
Assert.Equal(975, CountRows(loader));
}

[Fact]
Expand All @@ -1068,10 +1071,10 @@ public void EntryPointSDCAMultiClass()
TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier");
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact()]
public void EntryPointSDCARegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor");
TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.StochasticDualCoordinateAscentRegressor", loader: TestDatasets.winequality.loaderSettings);
}

[Fact]
Expand Down Expand Up @@ -1142,10 +1145,10 @@ public void EntryPointHogwildSGD()
TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier");
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact()]
public void EntryPointPoissonRegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor");
TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.PoissonRegressor", loader: TestDatasets.winequality.loaderSettings);
}

[Fact]
Expand Down
8 changes: 8 additions & 0 deletions test/Microsoft.ML.TestFramework/Datasets.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,14 @@ public static class TestDatasets
testFilename = "housing.txt"
};

public static TestDataset winequality = new TestDataset
{
name = "wine",
trainFilename = "external/winequality-white.csv",
testFilename = "external/winequality-white.csv",
loaderSettings = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"
};

public static TestDataset msm = new TestDataset
{
// REVIEW: Why is the MSM train set smaller than the test set? Reverse these!
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@

<ItemGroup>
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="FastTreeNative" />
</ItemGroup>
</Project>

0 comments on commit d51321c

Please sign in to comment.