-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added stringtodate and datetostring UDFs (#1851)
- Loading branch information
1 parent
965e1e4
commit d425cae
Showing
9 changed files
with
407 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
ksql-engine/src/main/java/io/confluent/ksql/function/udf/datetime/DateToString.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package io.confluent.ksql.function.udf.datetime; | ||
|
||
import com.google.common.cache.CacheBuilder; | ||
import com.google.common.cache.CacheLoader; | ||
import com.google.common.cache.LoadingCache; | ||
import io.confluent.ksql.function.udf.Udf; | ||
import io.confluent.ksql.function.udf.UdfDescription; | ||
import java.time.LocalDate; | ||
import java.time.format.DateTimeFormatter; | ||
|
||
@UdfDescription(name = "datetostring", author = "Confluent", | ||
description = "Converts an integer representing days since epoch to a date string using the given format pattern." | ||
+ " Note this is the format Kafka Connect uses to represent dates with no time component." | ||
+ " The format pattern should be in the format expected by java.time.format.DateTimeFormatter") | ||
public class DateToString { | ||
|
||
private final LoadingCache<String, DateTimeFormatter> formatters = | ||
CacheBuilder.newBuilder() | ||
.maximumSize(1000) | ||
.build(CacheLoader.from(DateTimeFormatter::ofPattern)); | ||
|
||
@Udf(description = "Converts an integer representing days since epoch to a string using the given format pattern." | ||
+ " The format pattern should be in the format expected by java.time.format.DateTimeFormatter") | ||
public String dateToString(final Integer daysSinceEpoch, final String formatPattern) { | ||
final DateTimeFormatter formatter = formatters.getUnchecked(formatPattern); | ||
return LocalDate.ofEpochDay(daysSinceEpoch).format(formatter); | ||
} | ||
|
||
} |
31 changes: 31 additions & 0 deletions
31
ksql-engine/src/main/java/io/confluent/ksql/function/udf/datetime/StringToDate.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package io.confluent.ksql.function.udf.datetime; | ||
|
||
import com.google.common.cache.CacheBuilder; | ||
import com.google.common.cache.CacheLoader; | ||
import com.google.common.cache.LoadingCache; | ||
import io.confluent.ksql.function.udf.Udf; | ||
import io.confluent.ksql.function.udf.UdfDescription; | ||
import java.time.LocalDate; | ||
import java.time.format.DateTimeFormatter; | ||
|
||
@UdfDescription(name = "stringtodate", author = "Confluent", | ||
description = "Converts a string representation of a date into an integer representing" | ||
+ " days since epoch using the given format pattern." | ||
+ " Note this is the format Kafka Connect uses to represent dates with no time component." | ||
+ " The format pattern should be in the format expected by java.time.format.DateTimeFormatter") | ||
public class StringToDate { | ||
|
||
private final LoadingCache<String, DateTimeFormatter> formatters = | ||
CacheBuilder.newBuilder() | ||
.maximumSize(1000) | ||
.build(CacheLoader.from(DateTimeFormatter::ofPattern)); | ||
|
||
@Udf(description = "Converts a string representation of a date into an integer representing" | ||
+ " days since epoch using the given format pattern." | ||
+ " The format pattern should be in the format expected by java.time.format.DateTimeFormatter") | ||
public int stringToDate(final String formattedDate, final String formatPattern) { | ||
DateTimeFormatter formatter = formatters.getUnchecked(formatPattern); | ||
return ((int)LocalDate.parse(formattedDate, formatter).toEpochDay()); | ||
} | ||
|
||
} |
127 changes: 127 additions & 0 deletions
127
ksql-engine/src/test/java/io/confluent/ksql/function/udf/datetime/DateToStringTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
/* | ||
* Copyright 2018 Confluent Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.confluent.ksql.function.udf.datetime; | ||
|
||
import static org.hamcrest.CoreMatchers.is; | ||
import static org.junit.Assert.assertThat; | ||
import static org.junit.Assert.fail; | ||
|
||
import com.google.common.util.concurrent.UncheckedExecutionException; | ||
import io.confluent.ksql.function.KsqlFunctionException; | ||
import java.text.ParseException; | ||
import java.text.SimpleDateFormat; | ||
import java.util.Calendar; | ||
import java.util.Date; | ||
import java.util.TimeZone; | ||
import java.util.stream.IntStream; | ||
import org.junit.Assert; | ||
import org.junit.Before; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.junit.rules.ExpectedException; | ||
|
||
public class DateToStringTest { | ||
|
||
private static final long MILLIS_PER_DAY = 24 * 60 * 60 * 1000; | ||
private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); | ||
|
||
private DateToString udf; | ||
|
||
@Rule | ||
public final ExpectedException expectedException = ExpectedException.none(); | ||
|
||
@Before | ||
public void setUp(){ | ||
udf = new DateToString(); | ||
} | ||
|
||
@Test | ||
public void shouldConvertDateToString() { | ||
// When: | ||
final String result = udf.dateToString(16383, "yyyy-MM-dd"); | ||
|
||
// Then: | ||
final String expectedResult = expectedResult(16383, "yyyy-MM-dd"); | ||
assertThat(result, is(expectedResult)); | ||
} | ||
|
||
@Test | ||
public void shouldRoundTripWithStringToDate() { | ||
final String format = "dd/MM/yyyy'Freya'"; | ||
final StringToDate stringToDate = new StringToDate(); | ||
IntStream.range(-10_000, 20_000) | ||
.parallel() | ||
.forEach(idx -> { | ||
final String result = udf.dateToString(idx, format); | ||
final String expectedResult = expectedResult(idx, format); | ||
assertThat(result, is(expectedResult)); | ||
|
||
final int daysSinceEpoch = stringToDate.stringToDate(result, format); | ||
assertThat(daysSinceEpoch, is(idx)); | ||
}); | ||
} | ||
|
||
@Test | ||
public void shouldSupportEmbeddedChars() { | ||
// When: | ||
final Object result = udf.dateToString(12345, "yyyy-dd-MM'Fred'"); | ||
|
||
// Then: | ||
final String expectedResult = expectedResult(12345, "yyyy-dd-MM'Fred'"); | ||
assertThat(result, is(expectedResult)); | ||
} | ||
|
||
@Test | ||
public void shouldThrowIfFormatInvalid() { | ||
expectedException.expect(UncheckedExecutionException.class); | ||
expectedException.expectMessage("Unknown pattern letter: i"); | ||
udf.dateToString(44444, "invalid"); | ||
} | ||
|
||
@Test | ||
public void shouldBeThreadSafe() { | ||
IntStream.range(0, 10_000) | ||
.parallel() | ||
.forEach(idx -> { | ||
shouldConvertDateToString(); | ||
udf.dateToString(55555, "yyyy-MM-dd"); | ||
}); | ||
} | ||
|
||
@Test | ||
public void shouldWorkWithManyDifferentFormatters() { | ||
IntStream.range(0, 10_000) | ||
.parallel() | ||
.forEach(idx -> { | ||
try { | ||
final String pattern = "yyyy-MM-dd'X" + idx + "'"; | ||
final String result = udf.dateToString(idx, pattern); | ||
final String expectedResult = expectedResult(idx, pattern); | ||
assertThat(result, is(expectedResult)); | ||
} catch (final Exception e) { | ||
Assert.fail(e.getMessage()); | ||
} | ||
}); | ||
} | ||
|
||
private String expectedResult(final int daysSinceEpoch, final String formatPattern) { | ||
SimpleDateFormat dateFormat = new SimpleDateFormat(formatPattern); | ||
dateFormat.setCalendar(Calendar.getInstance(UTC)); | ||
return dateFormat.format(new java.util.Date(daysSinceEpoch * MILLIS_PER_DAY)); | ||
} | ||
|
||
} |
130 changes: 130 additions & 0 deletions
130
ksql-engine/src/test/java/io/confluent/ksql/function/udf/datetime/StringToDateTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
/* | ||
* Copyright 2018 Confluent Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.confluent.ksql.function.udf.datetime; | ||
|
||
import static org.hamcrest.CoreMatchers.is; | ||
import static org.junit.Assert.assertThat; | ||
import static org.junit.Assert.fail; | ||
|
||
import com.google.common.util.concurrent.UncheckedExecutionException; | ||
import io.confluent.ksql.function.KsqlFunctionException; | ||
import java.text.ParseException; | ||
import java.text.SimpleDateFormat; | ||
import java.time.format.DateTimeParseException; | ||
import java.util.Calendar; | ||
import java.util.Date; | ||
import java.util.Locale; | ||
import java.util.TimeZone; | ||
import java.util.stream.IntStream; | ||
import org.apache.kafka.connect.errors.DataException; | ||
import org.junit.Assert; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
public class StringToDateTest { | ||
|
||
private static final long MILLIS_PER_DAY = 24 * 60 * 60 * 1000; | ||
private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); | ||
|
||
private StringToDate udf; | ||
|
||
@Before | ||
public void setUp(){ | ||
udf = new StringToDate(); | ||
} | ||
|
||
@Test | ||
public void shouldConvertStringToDate() throws ParseException { | ||
// When: | ||
final int result = udf.stringToDate("2021-12-01", "yyyy-MM-dd"); | ||
|
||
// Then: | ||
final int expectedResult = expectedResult("2021-12-01", "yyyy-MM-dd"); | ||
assertThat(result, is(expectedResult)); | ||
} | ||
|
||
@Test | ||
public void shouldSupportEmbeddedChars() throws ParseException { | ||
// When: | ||
final Object result = udf.stringToDate("2021-12-01Fred", "yyyy-MM-dd'Fred'"); | ||
|
||
// Then: | ||
final int expectedResult = expectedResult("2021-12-01Fred", "yyyy-MM-dd'Fred'"); | ||
assertThat(result, is(expectedResult)); | ||
} | ||
|
||
@Test(expected = UncheckedExecutionException.class) | ||
public void shouldThrowIfFormatInvalid() { | ||
udf.stringToDate("2021-12-01", "invalid"); | ||
} | ||
|
||
@Test(expected = DateTimeParseException.class) | ||
public void shouldThrowIfParseFails() { | ||
udf.stringToDate("invalid", "yyyy-MM-dd"); | ||
} | ||
|
||
@Test(expected = DateTimeParseException.class) | ||
public void shouldThrowOnEmptyString() { | ||
udf.stringToDate("", "yyyy-MM-dd"); | ||
} | ||
|
||
@Test | ||
public void shouldBeThreadSafe() { | ||
IntStream.range(0, 10_000) | ||
.parallel() | ||
.forEach(idx -> { | ||
try { | ||
shouldConvertStringToDate(); | ||
} catch (final Exception e) { | ||
Assert.fail(e.getMessage()); | ||
} | ||
udf.stringToDate("1988-01-12", "yyyy-MM-dd"); | ||
}); | ||
} | ||
|
||
@Test | ||
public void shouldWorkWithManyDifferentFormatters() { | ||
IntStream.range(0, 10_000) | ||
.parallel() | ||
.forEach(idx -> { | ||
try { | ||
final String sourceDate = "2021-12-01X" + idx; | ||
final String pattern = "yyyy-MM-dd'X" + idx + "'"; | ||
final int result = udf.stringToDate(sourceDate, pattern); | ||
final int expectedResult = expectedResult(sourceDate, pattern); | ||
assertThat(result, is(expectedResult)); | ||
} catch (final Exception e) { | ||
Assert.fail(e.getMessage()); | ||
} | ||
}); | ||
} | ||
|
||
|
||
private int expectedResult(final String formattedDate, final String formatPattern) throws ParseException { | ||
SimpleDateFormat dateFormat = new SimpleDateFormat(formatPattern); | ||
dateFormat.setCalendar(Calendar.getInstance(UTC)); | ||
Date parsedDate = dateFormat.parse(formattedDate); | ||
Calendar calendar = Calendar.getInstance(UTC); | ||
calendar.setTime(parsedDate); | ||
if (calendar.get(Calendar.HOUR_OF_DAY) != 0 || calendar.get(Calendar.MINUTE) != 0 || | ||
calendar.get(Calendar.SECOND) != 0 || calendar.get(Calendar.MILLISECOND) != 0) { | ||
fail("Date should not have any time fields set to non-zero values."); | ||
} | ||
return (int)(calendar.getTimeInMillis() / MILLIS_PER_DAY); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.