-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ML] Return both Joda and Java formats from structure finder #33900
Changes from 2 commits
69cd457
5ab3050
67904cb
81981ee
82efc3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,7 +99,8 @@ public String toString() { | |
public static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields"); | ||
public static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); | ||
public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field"); | ||
public static final ParseField TIMESTAMP_FORMATS = new ParseField("timestamp_formats"); | ||
public static final ParseField JODA_TIMESTAMP_FORMATS = new ParseField("joda_timestamp_formats"); | ||
public static final ParseField JAVA_TIMESTAMP_FORMATS = new ParseField("java_timestamp_formats"); | ||
public static final ParseField NEED_CLIENT_TIMEZONE = new ParseField("need_client_timezone"); | ||
public static final ParseField MAPPINGS = new ParseField("mappings"); | ||
public static final ParseField FIELD_STATS = new ParseField("field_stats"); | ||
|
@@ -123,7 +124,8 @@ public String toString() { | |
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS); | ||
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN); | ||
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD); | ||
PARSER.declareStringArray(Builder::setTimestampFormats, TIMESTAMP_FORMATS); | ||
PARSER.declareStringArray(Builder::setJodaTimestampFormats, JODA_TIMESTAMP_FORMATS); | ||
PARSER.declareStringArray(Builder::setJavaTimestampFormats, JAVA_TIMESTAMP_FORMATS); | ||
PARSER.declareBoolean(Builder::setNeedClientTimezone, NEED_CLIENT_TIMEZONE); | ||
PARSER.declareObject(Builder::setMappings, (p, c) -> new TreeMap<>(p.map()), MAPPINGS); | ||
PARSER.declareObject(Builder::setFieldStats, (p, c) -> { | ||
|
@@ -150,7 +152,8 @@ public String toString() { | |
private final Character quote; | ||
private final Boolean shouldTrimFields; | ||
private final String grokPattern; | ||
private final List<String> timestampFormats; | ||
private final List<String> jodaTimestampFormats; | ||
private final List<String> javaTimestampFormats; | ||
private final String timestampField; | ||
private final boolean needClientTimezone; | ||
private final SortedMap<String, Object> mappings; | ||
|
@@ -160,8 +163,9 @@ public String toString() { | |
public FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker, | ||
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> columnNames, | ||
Boolean hasHeaderRow, Character delimiter, Character quote, Boolean shouldTrimFields, String grokPattern, | ||
String timestampField, List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings, | ||
Map<String, FieldStats> fieldStats, List<String> explanation) { | ||
String timestampField, List<String> jodaTimestampFormats, List<String> javaTimestampFormats, | ||
boolean needClientTimezone, Map<String, Object> mappings, Map<String, FieldStats> fieldStats, | ||
List<String> explanation) { | ||
|
||
this.numLinesAnalyzed = numLinesAnalyzed; | ||
this.numMessagesAnalyzed = numMessagesAnalyzed; | ||
|
@@ -178,7 +182,10 @@ public FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampl | |
this.shouldTrimFields = shouldTrimFields; | ||
this.grokPattern = grokPattern; | ||
this.timestampField = timestampField; | ||
this.timestampFormats = (timestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(timestampFormats)); | ||
this.jodaTimestampFormats = | ||
(jodaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(jodaTimestampFormats)); | ||
this.javaTimestampFormats = | ||
(javaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(javaTimestampFormats)); | ||
this.needClientTimezone = needClientTimezone; | ||
this.mappings = Collections.unmodifiableSortedMap(new TreeMap<>(mappings)); | ||
this.fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(fieldStats)); | ||
|
@@ -200,7 +207,8 @@ public FileStructure(StreamInput in) throws IOException { | |
quote = in.readBoolean() ? (char) in.readVInt() : null; | ||
shouldTrimFields = in.readOptionalBoolean(); | ||
grokPattern = in.readOptionalString(); | ||
timestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null; | ||
jodaTimestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null; | ||
javaTimestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null; | ||
timestampField = in.readOptionalString(); | ||
needClientTimezone = in.readBoolean(); | ||
mappings = Collections.unmodifiableSortedMap(new TreeMap<>(in.readMap())); | ||
|
@@ -239,11 +247,17 @@ public void writeTo(StreamOutput out) throws IOException { | |
} | ||
out.writeOptionalBoolean(shouldTrimFields); | ||
out.writeOptionalString(grokPattern); | ||
if (timestampFormats == null) { | ||
if (jodaTimestampFormats == null) { | ||
out.writeBoolean(false); | ||
} else { | ||
out.writeBoolean(true); | ||
out.writeCollection(timestampFormats, StreamOutput::writeString); | ||
out.writeCollection(jodaTimestampFormats, StreamOutput::writeString); | ||
} | ||
if (javaTimestampFormats == null) { | ||
out.writeBoolean(false); | ||
} else { | ||
out.writeBoolean(true); | ||
out.writeCollection(javaTimestampFormats, StreamOutput::writeString); | ||
} | ||
out.writeOptionalString(timestampField); | ||
out.writeBoolean(needClientTimezone); | ||
|
@@ -312,8 +326,12 @@ public String getTimestampField() { | |
return timestampField; | ||
} | ||
|
||
public List<String> getTimestampFormats() { | ||
return timestampFormats; | ||
public List<String> getJodaTimestampFormats() { | ||
return jodaTimestampFormats; | ||
} | ||
|
||
public List<String> getJavaTimestampFormats() { | ||
return javaTimestampFormats; | ||
} | ||
|
||
public boolean needClientTimezone() { | ||
|
@@ -371,8 +389,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws | |
if (timestampField != null && timestampField.isEmpty() == false) { | ||
builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField); | ||
} | ||
if (timestampFormats != null && timestampFormats.isEmpty() == false) { | ||
builder.field(TIMESTAMP_FORMATS.getPreferredName(), timestampFormats); | ||
if (jodaTimestampFormats != null && jodaTimestampFormats.isEmpty() == false) { | ||
builder.field(JODA_TIMESTAMP_FORMATS.getPreferredName(), jodaTimestampFormats); | ||
} | ||
if (javaTimestampFormats != null && javaTimestampFormats.isEmpty() == false) { | ||
builder.field(JAVA_TIMESTAMP_FORMATS.getPreferredName(), javaTimestampFormats); | ||
} | ||
builder.field(NEED_CLIENT_TIMEZONE.getPreferredName(), needClientTimezone); | ||
builder.field(MAPPINGS.getPreferredName(), mappings); | ||
|
@@ -396,7 +417,7 @@ public int hashCode() { | |
|
||
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, | ||
multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern, | ||
timestampField, timestampFormats, needClientTimezone, mappings, fieldStats, explanation); | ||
timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, fieldStats, explanation); | ||
} | ||
|
||
@Override | ||
|
@@ -413,7 +434,6 @@ public boolean equals(Object other) { | |
FileStructure that = (FileStructure) other; | ||
return this.numLinesAnalyzed == that.numLinesAnalyzed && | ||
this.numMessagesAnalyzed == that.numMessagesAnalyzed && | ||
this.needClientTimezone == that.needClientTimezone && | ||
Objects.equals(this.sampleStart, that.sampleStart) && | ||
Objects.equals(this.charset, that.charset) && | ||
Objects.equals(this.hasByteOrderMarker, that.hasByteOrderMarker) && | ||
|
@@ -427,7 +447,9 @@ public boolean equals(Object other) { | |
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) && | ||
Objects.equals(this.grokPattern, that.grokPattern) && | ||
Objects.equals(this.timestampField, that.timestampField) && | ||
Objects.equals(this.timestampFormats, that.timestampFormats) && | ||
Objects.equals(this.jodaTimestampFormats, that.jodaTimestampFormats) && | ||
Objects.equals(this.javaTimestampFormats, that.javaTimestampFormats) && | ||
this.needClientTimezone == that.needClientTimezone && | ||
Objects.equals(this.mappings, that.mappings) && | ||
Objects.equals(this.fieldStats, that.fieldStats) && | ||
Objects.equals(this.explanation, that.explanation); | ||
|
@@ -450,7 +472,8 @@ public static class Builder { | |
private Boolean shouldTrimFields; | ||
private String grokPattern; | ||
private String timestampField; | ||
private List<String> timestampFormats; | ||
private List<String> jodaTimestampFormats; | ||
private List<String> javaTimestampFormats; | ||
private boolean needClientTimezone; | ||
private Map<String, Object> mappings; | ||
private Map<String, FieldStats> fieldStats = Collections.emptyMap(); | ||
|
@@ -539,8 +562,13 @@ public Builder setTimestampField(String timestampField) { | |
return this; | ||
} | ||
|
||
public Builder setTimestampFormats(List<String> timestampFormats) { | ||
this.timestampFormats = timestampFormats; | ||
public Builder setJodaTimestampFormats(List<String> jodaTimestampFormats) { | ||
this.jodaTimestampFormats = jodaTimestampFormats; | ||
return this; | ||
} | ||
|
||
public Builder setJavaTimestampFormats(List<String> javaTimestampFormats) { | ||
this.javaTimestampFormats = javaTimestampFormats; | ||
return this; | ||
} | ||
|
||
|
@@ -652,8 +680,14 @@ public FileStructure build() { | |
throw new IllegalStateException("enum value [" + format + "] missing from switch."); | ||
} | ||
|
||
if ((timestampField == null) != (timestampFormats == null || timestampFormats.isEmpty())) { | ||
throw new IllegalArgumentException("Timestamp field and timestamp formats must both be specified or neither be specified."); | ||
if ((timestampField == null) != (jodaTimestampFormats == null || jodaTimestampFormats.isEmpty())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: No need to change but I think this would read more easily if it was:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be equivalent it would have to say:
I could introduce some local variables and make it:
Or I could split it over two What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the local variables approach would definitely make it clear enough. |
||
throw new IllegalArgumentException( | ||
"Timestamp field and Joda timestamp formats must both be specified or neither be specified."); | ||
} | ||
|
||
if ((timestampField == null) != (javaTimestampFormats == null || javaTimestampFormats.isEmpty())) { | ||
throw new IllegalArgumentException( | ||
"Timestamp field and Java timestamp formats must both be specified or neither be specified."); | ||
} | ||
|
||
if (needClientTimezone && timestampField == null) { | ||
|
@@ -670,7 +704,7 @@ public FileStructure build() { | |
|
||
return new FileStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, | ||
multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern, | ||
timestampField, timestampFormats, needClientTimezone, mappings, fieldStats, explanation); | ||
timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, fieldStats, explanation); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reading through the code, I wonder if it would make things simpler to encapsulate this in a
TimestampFormat
class which will then own the joda/java pairs (or additional types if the future brings them). That would create that sort of 1-1 mapping between the alternatives. Just a thoughtThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not convinced that at the level of individual timestamp formats we want the Joda/Java formats grouped into pairs.
The output of the endpoint can currently contain something like this:
As a consumer of this information you'd want either the Joda formats or the Java formats, never both. Suppose the endpoint returned something like this:
That would be very annoying to use for somebody who wanted either the two Joda formats or the two Java formats. All consumers would end up having to write their own transformation function to get back to a list of formats of the same type.
So the other way to avoid the doubled method calls in a few places would be to have a
TimestampFormats
(plural) container that contains both lists of formats. The problem I foresee with this is that eventually we'll want to completely remove the Joda formats, so then the class will only have one member and will just be an annoyance.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see your point. Sounds good!