Skip to content

Commit

Permalink
Reintroduce an Exception type for invalid UTF-8 (#10553)
Browse files Browse the repository at this point in the history
Introduce `Utf8CharacterCodingException`  and `Utf8IllegalArgumentException` as a substitutes for the removed `Utf8Appendable.NotUtf8Exception`.

* Updates from review
  • Loading branch information
gregw authored Sep 22, 2023
1 parent 812d65d commit 57b953b
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ private static void decodeUtf8To(String query, int offset, int length, BiConsume
switch (c)
{
case '&':
value = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid value: Bad UTF-8"));
value = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
if (key != null)
{
adder.accept(key, value);
Expand All @@ -347,7 +347,7 @@ else if (value != null && value.length() > 0)
buffer.append(c);
break;
}
key = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid key: Bad UTF-8"));
key = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
break;

case '+':
Expand All @@ -363,7 +363,7 @@ else if (value != null && value.length() > 0)
}
else
{
throw new IllegalArgumentException("Incomplete % encoding");
throw new Utf8StringBuilder.Utf8IllegalArgumentException();
}
break;

Expand All @@ -375,7 +375,7 @@ else if (value != null && value.length() > 0)

if (key != null)
{
value = buffer.takeCompleteString(() -> new IllegalArgumentException("Invalid value: Bad UTF-8"));
value = buffer.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
adder.accept(key, value);
}
else if (buffer.length() > 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public String parse(ByteBuffer buf)
if (parseByte(b))
{
state = State.START;
return utf.takeCompleteString(() -> new IllegalArgumentException("Bad UTF-8"));
return utf.takeCompleteString(Utf8StringBuilder.Utf8IllegalArgumentException::new);
}
}
// have not reached end of line (yet)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,16 +387,29 @@ public <X extends Throwable> String takePartialString(Supplier<X> onCodingError)
@Override
public String build() throws CharacterCodingException
{
return takeCompleteString(Utf8StringBuilder::newUtf8CharacterCodingException);
return takeCompleteString(Utf8CharacterCodingException::new);
}

private static CharacterCodingException newUtf8CharacterCodingException()
public static class Utf8CharacterCodingException extends CharacterCodingException
{
return new CharacterCodingException()
@Override
public String getMessage()
{
{
initCause(new IllegalArgumentException("Bad UTF-8 encoding"));
}
};
return "Invalid UTF-8";
}

@Override
public String toString()
{
return "%s@%x: Invalid UTF-8".formatted(CharacterCodingException.class.getSimpleName(), hashCode());
}
}

public static class Utf8IllegalArgumentException extends IllegalArgumentException
{
public Utf8IllegalArgumentException()
{
super(new Utf8CharacterCodingException());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.junit.jupiter.params.provider.MethodSource;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.assertEquals;
Expand All @@ -42,7 +43,7 @@
public class Utf8StringBuilderTest
{
@Test
public void testUtf() throws Exception
public void testUtf()
{
String source = "abcd012345\n\r\u0000¤჻\ufffdjetty";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -56,7 +57,7 @@ public void testUtf() throws Exception
}

@Test
public void testUtf8WithMissingByte() throws Exception
public void testUtf8WithMissingByte()
{
String source = "abc჻";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -71,7 +72,7 @@ public void testUtf8WithMissingByte() throws Exception
}

@Test
public void testUtf8WithAdditionalByte() throws Exception
public void testUtf8WithAdditionalByte()
{
String source = "abcXX";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -88,7 +89,7 @@ public void testUtf8WithAdditionalByte() throws Exception
}

@Test
public void testUTF32codes() throws Exception
public void testUTF32codes()
{
String source = "\uD842\uDF9F";
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
Expand All @@ -103,7 +104,7 @@ public void testUTF32codes() throws Exception
}

@Test
public void testGermanUmlauts() throws Exception
public void testGermanUmlauts()
{
byte[] bytes = new byte[6];
bytes[0] = (byte)0xC3;
Expand All @@ -121,18 +122,27 @@ public void testGermanUmlauts() throws Exception
}

@Test
public void testInvalidUTF8() throws Exception
public void testInvalidUTF8()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();
utf8.append((byte)0xC2); // start of sequence
utf8.append((byte)0xC2); // start of another sequence
assertThat(utf8.toPartialString(), equalTo("�")); // only first sequence is reported as BAD
assertThat(utf8.toCompleteString(), equalTo("��")); // now both sequences are reported as BAD
assertThrows(CharacterCodingException.class, utf8::build);

try
{
utf8.build();
}
catch (Throwable t)
{
assertThat(t.toString(), containsString("Invalid UTF-8"));
}
}

@Test
public void testInvalidZeroUTF8() throws Exception
public void testInvalidZeroUTF8()
{
// From https://datatracker.ietf.org/doc/html/rfc3629#section-10
Utf8StringBuilder utf8 = new Utf8StringBuilder();
Expand All @@ -144,7 +154,7 @@ public void testInvalidZeroUTF8() throws Exception
}

@Test
public void testInvalidAlternateDotEncodingUTF8() throws Exception
public void testInvalidAlternateDotEncodingUTF8()
{
// From https://datatracker.ietf.org/doc/html/rfc3629#section-10
Utf8StringBuilder utf8 = new Utf8StringBuilder();
Expand All @@ -160,7 +170,7 @@ public void testInvalidAlternateDotEncodingUTF8() throws Exception
}

@Test
public void testFastFail1() throws Exception
public void testFastFail1()
{
byte[] part1 = StringUtil.fromHexString("cebae1bdb9cf83cebcceb5");
byte[] part2 = StringUtil.fromHexString("f4908080"); // INVALID
Expand All @@ -178,7 +188,7 @@ public void testFastFail1() throws Exception
}

@Test
public void testFastFail2() throws Exception
public void testFastFail2()
{
byte[] part1 = StringUtil.fromHexString("cebae1bdb9cf83cebcceb5f4");
byte[] part2 = StringUtil.fromHexString("90"); // INVALID
Expand All @@ -196,7 +206,7 @@ public void testFastFail2() throws Exception
}

@Test
public void testPartialSplitSingleCodepoint() throws Exception
public void testPartialSplitSingleCodepoint()
{
// GOTHIC LETTER HWAIR
final String gothicUnicode = "𐍈";
Expand All @@ -223,7 +233,7 @@ public void testPartialSplitSingleCodepoint() throws Exception
}

@Test
public void testPartialUnsplitCodepoint() throws Exception
public void testPartialUnsplitCodepoint()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();

Expand All @@ -245,7 +255,7 @@ public void testPartialUnsplitCodepoint() throws Exception
}

@Test
public void testPartialSplitCodepoint() throws Exception
public void testPartialSplitCodepoint()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();

Expand All @@ -267,7 +277,7 @@ public void testPartialSplitCodepoint() throws Exception
}

@Test
public void testPartialSplitCodepointWithNoBuf() throws Exception
public void testPartialSplitCodepointWithNoBuf()
{
Utf8StringBuilder utf8 = new Utf8StringBuilder();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ public CloseStatus(ByteBuffer payload)
Utf8StringBuilder utf = new Utf8StringBuilder();
// if this throws, we know we have bad UTF8
utf.append(reasonBytes, 0, reasonBytes.length);
String reason = utf.takeCompleteString(() -> new BadPayloadException("Invalid UTF8 in CLOSE Reason"));
String reason = utf.takeCompleteString(BadPayloadException.InvalidUtf8::new);

this.code = statusCode;
this.reason = reason;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

package org.eclipse.jetty.websocket.core.exception;

import org.eclipse.jetty.util.Utf8StringBuilder;
import org.eclipse.jetty.websocket.core.CloseStatus;

/**
Expand All @@ -38,4 +39,12 @@ public BadPayloadException(Throwable t)
{
super(CloseStatus.BAD_PAYLOAD, t);
}

public static class InvalidUtf8 extends BadPayloadException
{
public InvalidUtf8()
{
super("Invalid UTF-8", new Utf8StringBuilder.Utf8CharacterCodingException());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,13 @@ protected void onTextFrame(Frame frame, Callback callback)

if (frame.isFin())
{
onText(textBuffer.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8")), callback);
onText(textBuffer.takeCompleteString(BadPayloadException.InvalidUtf8::new), callback);
textBuffer.reset();
}
else
{
if (textBuffer.hasCodingErrors())
throw new BadPayloadException("Invalid UTF-8");
throw new BadPayloadException.InvalidUtf8();
else
callback.succeeded();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ public void accept(Frame frame, Callback callback)

if (frame.isFin())
{
String complete = accumulator.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8"));
String complete = accumulator.takeCompleteString(BadPayloadException.InvalidUtf8::new);
getMethodHandle().invoke(complete, true);
}
else
{
String partial = accumulator.takePartialString(() -> new BadPayloadException("Invalid UTF-8"));
String partial = accumulator.takePartialString(BadPayloadException.InvalidUtf8::new);
getMethodHandle().invoke(partial, false);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void accept(Frame frame, Callback callback)

if (frame.isFin())
{
getMethodHandle().invoke(out.takeCompleteString(() -> new BadPayloadException("Invalid UTF-8")));
getMethodHandle().invoke(out.takeCompleteString(BadPayloadException.InvalidUtf8::new));
callback.succeeded();
autoDemand();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ public void testUtf8Continuation() throws Exception
// Check decoding
Utf8StringBuilder check = new Utf8StringBuilder();
check.append(utf8Bytes, 0, 2);
String partial = check.takePartialString(IllegalStateException::new);
String partial = check.takePartialString(Utf8StringBuilder.Utf8CharacterCodingException::new);
assertThat(partial, equalTo(""));
check.append(utf8Bytes, 2, 2);
String complete = check.takeCompleteString(IllegalStateException::new);
String complete = check.takeCompleteString(Utf8StringBuilder.Utf8CharacterCodingException::new);
assertThat(complete, equalTo(gothicUnicode));

FutureCallback callback = new FutureCallback();
Expand Down Expand Up @@ -142,7 +142,7 @@ public void testInvalidMultiFrameUtf8() throws Exception

public static class OnMessageEndpoint
{
private BlockingArrayQueue<List<String>> messages;
private final BlockingArrayQueue<List<String>> messages;

public OnMessageEndpoint()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public void testNotCESU8() throws Exception

Utf8StringBuilder buf = new Utf8StringBuilder();
buf.append(BufferUtil.toArray(_bytes), 0, _bytes.remaining());
assertEquals(data, buf.takeCompleteString(IllegalArgumentException::new));
assertEquals(data, buf.takeCompleteString(Utf8StringBuilder.Utf8CharacterCodingException::new));
}

@Test
Expand Down

0 comments on commit 57b953b

Please sign in to comment.