Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle invalid accept-charset in requests - default to utf-8 #584

Merged
merged 6 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,19 @@ final case class HttpCharset private[http] (override val value: String)(val alia
/** Returns the Charset for this charset if available or throws an exception otherwise */
def nioCharset: Charset = _nioCharset.get

/**
* @return this HttpCharset instance if this charset can be parsed to a
* <code>java.nio.charset.Charset</code> instance, otherwise returns the UTF-8 charset.
* @since 1.1.0
*/
def charsetWithUtf8Failover: HttpCharset = {
if (_nioCharset.isSuccess) {
this
} else {
HttpCharsets.`UTF-8`
}
}

private def readObject(in: java.io.ObjectInputStream): Unit = {
in.defaultReadObject()
_nioCharset = HttpCharset.findNioCharset(value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,23 @@ class MarshallingSpec extends AnyFreeSpec with Matchers with BeforeAndAfterAll w
}
}

"The PredefinedToEntityMarshallers" - {
"StringMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in {
val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid")))
val request = HttpRequest().withHeaders(invalidAcceptCharsetHeader)
val responseEntity = marshalToResponse("Ha“llo", request).entity
responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`)
responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain`
}
"CharArrayMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in {
val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid")))
val request = HttpRequest().withHeaders(invalidAcceptCharsetHeader)
val responseEntity = marshalToResponse("Ha“llo".toCharArray(), request).entity
responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`)
responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain`
}
}

"The PredefinedToResponseMarshallers" - {
"fromStatusCode should properly marshal a status code that doesn't allow an entity" in {
marshalToResponse(StatusCodes.NoContent) shouldEqual HttpResponse(StatusCodes.NoContent,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside {
rejection shouldEqual UnacceptedResponseContentTypeRejection(Set(ContentType(`application/json`)))
}
}
"reject JSON rendering if an `Accept-Charset` request header requests an unknown encoding" in {
Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check {
rejection shouldEqual UnacceptedResponseContentTypeRejection(Set(ContentType(`application/json`)))
}
}
val acceptHeaderUtf = Accept.parseFromValueString("application/json;charset=utf8").right.get
val acceptHeaderNonUtf = Accept.parseFromValueString("application/json;charset=ISO-8859-1").right.get
"render JSON response when `Accept` header is present with the `charset` parameter ignoring it" in {
Expand All @@ -275,4 +280,43 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside {
}
}
}

"The marshalling infrastructure for text" should {
val foo = "Hällö"
"render text with UTF-8 encoding if no `Accept-Charset` request header is present" in {
Get() ~> complete(foo) ~> check {
responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `UTF-8`), foo)
}
}
"render text with requested encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in {
Get() ~> `Accept-Charset`(`ISO-8859-1`) ~> complete(foo) ~> check {
responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `ISO-8859-1`), foo)
}
}
"render text with UTF-8 encoding if an `Accept-Charset` request header requests an unknown encoding" in {
Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check {
responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `UTF-8`), foo)
}
}
Comment on lines +291 to +300
Copy link
Member

@raboof raboof Aug 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize this is a comment on the current behavior, and not on your change, but this seems strange to me: it looks like the default string marshaller will return the string passed to it as text/plain claiming it is in whatever charset that was requested by the user, without doing anything to make sure the string is actually in that charset.

This is only correct if you assume the implementation correctly took into account the charset, which seems extremely unlikely. I wonder if it wouldn't make more sense to leave out the optional charset parameter to the return content type in this case. If people have specific requirements (which seems somewhat unlikely) they can specify the charset explicitly.

This would be a behavior change, but it seems like a reasonable one if we note it in the release notes.

Copy link
Contributor Author

@pjfanning pjfanning Aug 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before the change to the main source code in this PR, this test actually fails - the response is an error response without my main source change

Copy link

@michaelf-crwd michaelf-crwd Aug 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,

Thanks for having a look at this bug / feature request. I recently sent in a duplicate issue (#583) unaware of this one. I worked on a similar PR the past two days, and have now found and looked through your changes - that would seem to fix it nicely! Great.

For your consideration, I'd like to suggest adding in an extra unit-test in "MarshallingSpec.scala" with something along the lines of:

val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid")))
val request = HttpRequest().withHeaders(Seq(invalidAcceptCharsetHeader))
marshalToResponse("Hello", request).entity.contentType.charsetOption should be(Some(HttpCharsets.`UTF-8`))

Some refactoring thoughts; Perhaps the try-catch in "PredefinedToEntityMarshallers.scala" could be more subtly placed in "HttpCharset.scala" as:

/** Returns this HttpCharset instance if the provided nio charset is available or a desired default otherwise */
  def withNioCharsetOrElse(default: HttpCharset): HttpCharset = {
    if (_nioCharset.isSuccess) {
      // Return this instance, as the provided nioCharset did not result in a java.nio.charset.UnsupportedCharsetException
      this
    } else {
      default
    }
  }

and then have the "PredefinedToEntityMarshallers.scala" do:

Marshaller.withOpenCharset(mediaType) { (s, cs) => {
  HttpEntity(mediaType.withCharset(cs.withNioCharsetOrElse(HttpCharsets.`UTF-8`)), s)
} }

Looking forward to seeing your changes merged.

Thanks again and best regards,
Michael

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @michaelf-crwd - I have added a new HttpCharsets function similar to what you suggested (c8514e7).
I will look later at adding extra tests like the one you suggested.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@raboof would you be able to take a look at this again? I don't think this change breaks anything and makes the pekko-http support for Accept-Charset a bit more tolerant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without doing anything to make sure the string is actually in that charset

@raboof, WDYM with that? Java strings are basically UTF-16 encoded, so they are known to be unicode. What else could go wrong?

Copy link
Member

@raboof raboof Sep 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what I meant is, when the route does complete(someUnicodeString), and the marshalling logic results in a Content-Type: text/plain; charset=ISO-8859-1 header, that response seems wrong: the header promises the body will be in ISO-8859-1, but the body is actually in unicode. I think it would make more sense to make sure we produce a Content-Type: text/plain; charset=UTF-8 header unless the developer explicitly marked the response as being in a different charset.

Copy link
Member

@raboof raboof Sep 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or will the marshalling logic actually convert the unicode input into bytes using the charset from the content type? in that case of course it's all good.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed it does, sorry about the noise 🤦

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

}

"The marshalling infrastructure for text/xml" should {
val foo = <foo>Hällö</foo>
"render XML with UTF-8 encoding if no `Accept-Charset` request header is present" in {
Get() ~> complete(foo) ~> check {
responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `UTF-8`), foo.toString)
}
}
"render XML with requested encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in {
Get() ~> `Accept-Charset`(`ISO-8859-1`) ~> complete(foo) ~> check {
responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `ISO-8859-1`), foo.toString)
}
}
"render XML with UTF-8 encoding if an `Accept-Charset` request header requests an unknown encoding" ignore {
// still returns Content-Type: text/xml; charset=unknown
Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check {
responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `UTF-8`), foo.toString)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers {
implicit val CharArrayMarshaller: ToEntityMarshaller[Array[Char]] = charArrayMarshaller(`text/plain`)
def charArrayMarshaller(mediaType: MediaType.WithOpenCharset): ToEntityMarshaller[Array[Char]] =
Marshaller.withOpenCharset(mediaType) { (value, charset) =>
marshalCharArray(value, mediaType.withCharset(charset))
// https://github.com/apache/pekko-http/issues/300
// ignore issues with invalid charset - use UTF-8 instead
marshalCharArray(value, mediaType.withCharset(charset.charsetWithUtf8Failover))
}
def charArrayMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[Array[Char]] =
Marshaller.withFixedContentType(mediaType) { value => marshalCharArray(value, mediaType) }
Expand All @@ -55,7 +57,11 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers {

implicit val StringMarshaller: ToEntityMarshaller[String] = stringMarshaller(`text/plain`)
def stringMarshaller(mediaType: MediaType.WithOpenCharset): ToEntityMarshaller[String] =
Marshaller.withOpenCharset(mediaType) { (s, cs) => HttpEntity(mediaType.withCharset(cs), s) }
Marshaller.withOpenCharset(mediaType) { (s, cs) =>
// https://github.com/apache/pekko-http/issues/300
// ignore issues with invalid charset - use UTF-8 instead
HttpEntity(mediaType.withCharset(cs.charsetWithUtf8Failover), s)
}
def stringMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[String] =
Marshaller.withFixedContentType(mediaType) { s => HttpEntity(mediaType, s) }

Expand Down
Loading