forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[7.4][ML] Regression dependent variable must be numeric (elastic#46072)
* [ML] Regression dependent variable must be numeric This adds a validation that the dependent variable of a regression analysis must be numeric. * Address review comments and fix some problems In addition to addressing the review comments, this commit fixes a few issues I found during testing. In particular: - if there were mappings for required fields but they were not included we were not reporting the error - if explicitly included fields had unsupported types we were not reporting the error Unfortunately, I couldn't get those fixed without refactoring the code in `ExtractedFieldsDetector`.
- Loading branch information
1 parent
5b61708
commit 4d8eff8
Showing
8 changed files
with
326 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
.../core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RequiredField.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
package org.elasticsearch.xpack.core.ml.dataframe.analyses; | ||
|
||
import java.util.Collections; | ||
import java.util.Objects; | ||
import java.util.Set; | ||
import java.util.SortedSet; | ||
import java.util.TreeSet; | ||
|
||
public class RequiredField { | ||
|
||
private final String name; | ||
|
||
/** | ||
* The required field must have one of those types. | ||
* We use a sorted set to ensure types are reported alphabetically in error messages. | ||
*/ | ||
private final SortedSet<String> types; | ||
|
||
public RequiredField(String name, Set<String> types) { | ||
this.name = Objects.requireNonNull(name); | ||
this.types = Collections.unmodifiableSortedSet(new TreeSet<>(types)); | ||
} | ||
|
||
public String getName() { | ||
return name; | ||
} | ||
|
||
public SortedSet<String> getTypes() { | ||
return types; | ||
} | ||
} |
43 changes: 43 additions & 0 deletions
43
...k/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Types.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
package org.elasticsearch.xpack.core.ml.dataframe.analyses; | ||
|
||
import org.elasticsearch.index.mapper.NumberFieldMapper; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.HashSet; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
|
||
/** | ||
* Helper class that defines groups of types | ||
*/ | ||
public final class Types { | ||
|
||
private Types() {} | ||
|
||
private static final Set<String> CATEGORICAL_TYPES = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("text", "keyword", "ip"))); | ||
|
||
private static final Set<String> NUMERICAL_TYPES; | ||
|
||
static { | ||
Set<String> numericalTypes = Stream.of(NumberFieldMapper.NumberType.values()) | ||
.map(NumberFieldMapper.NumberType::typeName) | ||
.collect(Collectors.toSet()); | ||
numericalTypes.add("scaled_float"); | ||
NUMERICAL_TYPES = Collections.unmodifiableSet(numericalTypes); | ||
} | ||
|
||
public static Set<String> categorical() { | ||
return CATEGORICAL_TYPES; | ||
} | ||
|
||
public static Set<String> numerical() { | ||
return NUMERICAL_TYPES; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.