Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reintroduce an Exception type for invalid UTF-8 #10553

Merged
merged 3 commits into from
Sep 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ private static void decodeUtf8To(String query, int offset, int length, BiConsume
switch (c)
{
case '&':
value = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid value: Bad UTF-8"));
value = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
if (key != null)
{
adder.accept(key, value);
Expand All @@ -347,7 +347,7 @@ else if (value != null && value.length() > 0)
buffer.append(c);
break;
}
key = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid key: Bad UTF-8"));
key = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
break;

case '+':
Expand All @@ -363,7 +363,7 @@ else if (value != null && value.length() > 0)
}
else
{
throw new IllegalArgumentException("Incomplete % encoding");
throw new Utf8StringBuilder.Utf8IllegalArgumentException();
}
break;

Expand All @@ -375,7 +375,7 @@ else if (value != null && value.length() > 0)

if (key != null)
{
value = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid value: Bad UTF-8"));
value = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
adder.accept(key, value);
}
else if (buffer.length() > 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public String parse(ByteBuffer buf)
if (parseByte(b))
{
state = State.START;
return utf.takeCompleteString(() -> new IllegalArgumentException("Bad UTF-8"));
return utf.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
}
}
// have not reached end of line (yet)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,16 +387,29 @@ public <X extends Throwable> String takePartialString(Supplier<X> onCodingError)
@Override
public String build() throws CharacterCodingException
{
return takeCompleteString(Utf8StringBuilder::newUtf8CharacterCodingException);
return takeCompleteString(Utf8CharacterCodingException::new);
}

private static CharacterCodingException newUtf8CharacterCodingException()
public static class Utf8CharacterCodingException extends CharacterCodingException
{
return new CharacterCodingException()
@Override
public String getMessage()
{
{
initCause(new IllegalArgumentException("Bad UTF-8 encoding"));
}
};
return "Invalid UTF-8";
}

@Override
public String toString()
{
return "%s@%x: Invalid UTF-8".formatted(CharacterCodingException.class.getSimpleName(), hashCode());
}
}

public static class Utf8IllegalArgumentException extends IllegalArgumentException
{
public Utf8IllegalArgumentException()
{
super(new Utf8CharacterCodingException());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.junit.jupiter.params.provider.MethodSource;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.assertEquals;
Expand All @@ -42,7 +43,7 @@
public class Utf8StringBuilderTest
{
@Test
public void testUtf() throws Exception
public void testUtf()
{
String source = "abcd012345\n\r\u0000¤჻\ufffdjetty";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -56,7 +57,7 @@ public void testUtf() throws Exception
}

@Test
public void testUtf8WithMissingByte() throws Exception
public void testUtf8WithMissingByte()
{
String source = "abc჻";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -71,7 +72,7 @@ public void testUtf8WithMissingByte() throws Exception
}

@Test
public void testUtf8WithAdditionalByte() throws Exception
public void testUtf8WithAdditionalByte()
{
String source = "abcXX";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -88,7 +89,7 @@ public void testUtf8WithAdditionalByte() throws Exception
}

@Test
public void testUTF32codes() throws Exception
public void testUTF32codes()
{
String source = "\uD842\uDF9F";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -103,7 +104,7 @@ public void testUTF32codes() throws Exception
}

@Test
public void testGermanUmlauts() throws Exception
public void testGermanUmlauts()
{
byte[] bytes = new byte[6];
bytes[0] = (byte)0xC3;
Expand All @@ -121,18 +122,27 @@ public void testGermanUmlauts() throws Exception
}

@Test
public void testInvalidUTF8() throws Exception
public void testInvalidUTF8()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();
utf8.append((byte)0xC2); // start of sequence
utf8.append((byte)0xC2); // start of another sequence
assertThat(utf8.toPartialString(), equalTo("�")); // only first sequence is reported as BAD
assertThat(utf8.toCompleteString(), equalTo("��")); // now both sequences are reported as BAD
assertThrows(CharacterCodingException.class, utf8::build);

try
{
utf8.build();
}
catch (Throwable t)
{
assertThat(t.toString(), containsString("Invalid UTF-8"));
}
}

@Test
public void testInvalidZeroUTF8() throws Exception
public void testInvalidZeroUTF8()
{
// From https://datatracker.ietf.org/doc/html/rfc3629#section-10
Utf8StringBuilder utf8 = new Utf8StringBuilder();
Expand All @@ -144,7 +154,7 @@ public void testInvalidZeroUTF8() throws Exception
}

@Test
public void testInvalidAlternateDotEncodingUTF8() throws Exception
public void testInvalidAlternateDotEncodingUTF8()
{
// From https://datatracker.ietf.org/doc/html/rfc3629#section-10
Utf8StringBuilder utf8 = new Utf8StringBuilder();
Expand All @@ -160,7 +170,7 @@ public void testInvalidAlternateDotEncodingUTF8() throws Exception
}

@Test
public void testFastFail1() throws Exception
public void testFastFail1()
{
byte[] part1 = StringUtil.fromHexString("cebae1bdb9cf83cebcceb5");
byte[] part2 = StringUtil.fromHexString("f4908080"); // INVALID
Expand All @@ -178,7 +188,7 @@ public void testFastFail1() throws Exception
}

@Test
public void testFastFail2() throws Exception
public void testFastFail2()
{
byte[] part1 = StringUtil.fromHexString("cebae1bdb9cf83cebcceb5f4");
byte[] part2 = StringUtil.fromHexString("90"); // INVALID
Expand All @@ -196,7 +206,7 @@ public void testFastFail2() throws Exception
}

@Test
public void testPartialSplitSingleCodepoint() throws Exception
public void testPartialSplitSingleCodepoint()
{
// GOTHIC LETTER HWAIR
final String gothicUnicode = "𐍈";
Expand All @@ -223,7 +233,7 @@ public void testPartialSplitSingleCodepoint() throws Exception
}

@Test
public void testPartialUnsplitCodepoint() throws Exception
public void testPartialUnsplitCodepoint()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();

Expand All @@ -245,7 +255,7 @@ public void testPartialUnsplitCodepoint() throws Exception
}

@Test
public void testPartialSplitCodepoint() throws Exception
public void testPartialSplitCodepoint()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();

Expand All @@ -267,7 +277,7 @@ public void testPartialSplitCodepoint() throws Exception
}

@Test
public void testPartialSplitCodepointWithNoBuf() throws Exception
public void testPartialSplitCodepointWithNoBuf()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ public CloseStatus(ByteBuffer payload)
Utf8StringBuilder utf = new Utf8StringBuilder();
// if this throws, we know we have bad UTF8
utf.append(reasonBytes, 0, reasonBytes.length);
String reason = utf.takeCompleteString(() -> new BadPayloadException("Invalid UTF8 in CLOSE Reason"));
String reason = utf.takeCompleteString(BadPayloadException.InvalidUtf8::new);

this.code = statusCode;
this.reason = reason;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

package org.eclipse.jetty.websocket.core.exception;

import org.eclipse.jetty.util.Utf8StringBuilder;
import org.eclipse.jetty.websocket.core.CloseStatus;

/**
Expand All @@ -38,4 +39,12 @@ public BadPayloadException(Throwable t)
{
super(CloseStatus.BAD_PAYLOAD, t);
}

public static class InvalidUtf8 extends BadPayloadException
{
public InvalidUtf8()
{
super("Invalid UTF-8", new Utf8StringBuilder.Utf8CharacterCodingException());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,13 @@ protected void onTextFrame(Frame frame, Callback callback)

if (frame.isFin())
{
onText(textBuffer.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8")), callback);
onText(textBuffer.takeCompleteString(BadPayloadException.InvalidUtf8::new), callback);
textBuffer.reset();
}
else
{
if (textBuffer.hasCodingErrors())
throw new BadPayloadException("Invalid UTF-8");
throw new BadPayloadException.InvalidUtf8();
else
callback.succeeded();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ public void accept(Frame frame, Callback callback)

if (frame.isFin())
{
String complete = accumulator.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8"));
String complete = accumulator.takeCompleteString(BadPayloadException.InvalidUtf8::new);
getMethodHandle().invoke(complete, true);
}
else
{
String partial = accumulator.takePartialString(() -> new BadPayloadException("Invalid UTF-8"));
String partial = accumulator.takePartialString(BadPayloadException.InvalidUtf8::new);
getMethodHandle().invoke(partial, false);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void accept(Frame frame, Callback callback)

if (frame.isFin())
{
getMethodHandle().invoke(out.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8")));
getMethodHandle().invoke(out.takeCompleteString(BadPayloadException.InvalidUtf8::new));
callback.succeeded();
autoDemand();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ public void testUtf8Continuation() throws Exception
// Check decoding
Utf8StringBuilder check = new Utf8StringBuilder();
check.append(utf8Bytes, 0, 2);
String partial = check.takePartialString(IllegalStateException::new);
String partial = check.takePartialString(Utf8StringBuilder.Utf8CharacterCodingException::new);
assertThat(partial, equalTo(""));
check.append(utf8Bytes, 2, 2);
String complete = check.takeCompleteString(IllegalStateException::new);
String complete = check.takeCompleteString(Utf8StringBuilder.Utf8CharacterCodingException::new);
assertThat(complete, equalTo(gothicUnicode));

FutureCallback callback = new FutureCallback();
Expand Down Expand Up @@ -142,7 +142,7 @@ public void testInvalidMultiFrameUtf8() throws Exception

public static class OnMessageEndpoint
{
private BlockingArrayQueue<List<String>> messages;
private final BlockingArrayQueue<List<String>> messages;

public OnMessageEndpoint()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public void testNotCESU8() throws Exception

Utf8StringBuilder buf = new Utf8StringBuilder();
buf.append(BufferUtil.toArray(_bytes), 0, _bytes.remaining());
assertEquals(data, buf.takeCompleteString(IllegalArgumentException::new));
assertEquals(data, buf.takeCompleteString(Utf8StringBuilder.Utf8CharacterCodingException::new));
}

@Test
Expand Down