From 5490b3add4d8e896f365e9922fa70883944d825e Mon Sep 17 00:00:00 2001 From: Lachlan Roberts Date: Tue, 8 Dec 2020 09:32:26 +1100 Subject: [PATCH 1/5] Issue #5757 - improve javadoc around inferred and assumed charsets Signed-off-by: Lachlan Roberts --- .../org/eclipse/jetty/server/Response.java | 105 ++++++++++-------- 1 file changed, 60 insertions(+), 45 deletions(-) diff --git a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java index c86a134d9919..abd1cfa0d746 100644 --- a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java +++ b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java @@ -103,7 +103,31 @@ public enum OutputType private enum EncodingFrom { - NOT_SET, INFERRED, SET_LOCALE, SET_CONTENT_TYPE, SET_CHARACTER_ENCODING + /** + * Character encoding was not set, or the encoding was cleared with {@code setCharacterEncoding(null)}. + */ + NOT_SET, + + /** + * Character encoding was not explicitly set but has a default value defined by the {@code encoding.properties} file. + * @see MimeTypes#getInferredEncodings(). + */ + INFERRED, + + /** + * The default character encoding of the locale was used after a call to {@link #setLocale(Locale)}. + */ + SET_LOCALE, + + /** + * The character encoding has been explicitly set using the Content-Type charset parameter with {@link #setContentType(String)}. + */ + SET_CONTENT_TYPE, + + /** + * The character encoding has been explicitly set using {@link #setCharacterEncoding(String)}. + */ + SET_CHARACTER_ENCODING } private static final EnumSet __localeOverride = EnumSet.of(EncodingFrom.NOT_SET, EncodingFrom.INFERRED, EncodingFrom.SET_LOCALE); @@ -814,30 +838,30 @@ public PrintWriter getWriter() throws IOException if (_outputType == OutputType.NONE) { - //first try explicit char encoding + // First try explicit char encoding. String encoding = _characterEncoding; - //try char set from mime type + // Try charset from mime type. if (encoding == null) { if (_mimeType != null && _mimeType.isCharsetAssumed()) encoding = _mimeType.getCharsetString(); } - //try char set assumed from content type + // Try charset assumed from content type. if (encoding == null) { encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType); } - //try char set inferred from content type + // Try char set inferred from content type. if (encoding == null) { encoding = MimeTypes.getCharsetInferredFromContentType(_contentType); setCharacterEncoding(encoding, EncodingFrom.INFERRED); } - //try any default char encoding for the context + // Try any default char encoding for the context. if (encoding == null) { Context context = _channel.getRequest().getContext(); @@ -845,7 +869,7 @@ public PrintWriter getWriter() throws IOException encoding = context.getResponseCharacterEncoding(); } - //fallback to last resort iso-8859-1 + // Fallback to last resort iso-8859-1. if (encoding == null) { encoding = StringUtil.__ISO_8859_1; @@ -853,7 +877,6 @@ public PrintWriter getWriter() throws IOException } Locale locale = getLocale(); - if (_writer != null && _writer.isFor(locale, encoding)) _writer.reopen(); else @@ -866,7 +889,7 @@ else if (StringUtil.__UTF8.equalsIgnoreCase(encoding)) _writer = new ResponseWriter(new EncodingHttpWriter(_out, encoding), locale, encoding); } - // Set the output type at the end, because setCharacterEncoding() checks for it + // Set the output type at the end, because setCharacterEncoding() checks for it. _outputType = OutputType.WRITER; } return _writer; @@ -988,54 +1011,47 @@ public void setCharacterEncoding(String encoding) private void setCharacterEncoding(String encoding, EncodingFrom from) { - if (!isMutable() || isWriting()) + if (!isMutable() || isWriting() || isCommitted()) return; - if (_outputType != OutputType.WRITER && !isCommitted()) + if (encoding == null) { - if (encoding == null) + _encodingFrom = EncodingFrom.NOT_SET; + if (_characterEncoding != null) { - _encodingFrom = EncodingFrom.NOT_SET; - - // Clear any encoding. - if (_characterEncoding != null) - { - _characterEncoding = null; - - if (_mimeType != null) - { - _mimeType = _mimeType.getBaseType(); - _contentType = _mimeType.asString(); - _fields.put(_mimeType.getContentTypeField()); - } - else if (_contentType != null) - { - _contentType = MimeTypes.getContentTypeWithoutCharset(_contentType); - _fields.put(HttpHeader.CONTENT_TYPE, _contentType); - } - } - } - else - { - // No, so just add this one to the mimetype - _encodingFrom = from; - _characterEncoding = HttpGenerator.__STRICT ? encoding : StringUtil.normalizeCharset(encoding); + _characterEncoding = null; if (_mimeType != null) { - _contentType = _mimeType.getBaseType().asString() + ";charset=" + _characterEncoding; - _mimeType = MimeTypes.CACHE.get(_contentType); - if (_mimeType == null || HttpGenerator.__STRICT) - _fields.put(HttpHeader.CONTENT_TYPE, _contentType); - else - _fields.put(_mimeType.getContentTypeField()); + _mimeType = _mimeType.getBaseType(); + _contentType = _mimeType.asString(); + _fields.put(_mimeType.getContentTypeField()); } else if (_contentType != null) { - _contentType = MimeTypes.getContentTypeWithoutCharset(_contentType) + ";charset=" + _characterEncoding; + _contentType = MimeTypes.getContentTypeWithoutCharset(_contentType); _fields.put(HttpHeader.CONTENT_TYPE, _contentType); } } } + else + { + _encodingFrom = from; + _characterEncoding = HttpGenerator.__STRICT ? encoding : StringUtil.normalizeCharset(encoding); + if (_mimeType != null) + { + _contentType = _mimeType.getBaseType().asString() + ";charset=" + _characterEncoding; + _mimeType = MimeTypes.CACHE.get(_contentType); + if (_mimeType == null || HttpGenerator.__STRICT) + _fields.put(HttpHeader.CONTENT_TYPE, _contentType); + else + _fields.put(_mimeType.getContentTypeField()); + } + else if (_contentType != null) + { + _contentType = MimeTypes.getContentTypeWithoutCharset(_contentType) + ";charset=" + _characterEncoding; + _fields.put(HttpHeader.CONTENT_TYPE, _contentType); + } + } } @Override @@ -1328,7 +1344,6 @@ public void setLocale(Locale locale) return; String charset = _channel.getRequest().getContext().getContextHandler().getLocaleEncoding(locale); - if (charset != null && charset.length() > 0 && __localeOverride.contains(_encodingFrom)) setCharacterEncoding(charset, EncodingFrom.SET_LOCALE); } From f5fab09498f7c9f4af7deb0ba57389657df85e0c Mon Sep 17 00:00:00 2001 From: Lachlan Roberts Date: Wed, 9 Dec 2020 15:25:46 +1100 Subject: [PATCH 2/5] Issue #5757 - cleanup logic around character encoding in Response Signed-off-by: Lachlan Roberts --- .../org/eclipse/jetty/server/Response.java | 131 ++++++++---------- 1 file changed, 59 insertions(+), 72 deletions(-) diff --git a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java index abd1cfa0d746..dd5ccf46f008 100644 --- a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java +++ b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java @@ -131,7 +131,7 @@ private enum EncodingFrom } private static final EnumSet __localeOverride = EnumSet.of(EncodingFrom.NOT_SET, EncodingFrom.INFERRED, EncodingFrom.SET_LOCALE); - private static final EnumSet __explicitCharset = EnumSet.of(EncodingFrom.SET_LOCALE, EncodingFrom.SET_CHARACTER_ENCODING); + private static final EnumSet __explicitCharset = EnumSet.of(EncodingFrom.SET_LOCALE, EncodingFrom.SET_CHARACTER_ENCODING, EncodingFrom.SET_CONTENT_TYPE); public Response(HttpChannel channel, HttpOutput out) { @@ -778,26 +778,40 @@ public void setStatusWithReason(int sc, String message) @Override public String getCharacterEncoding() { - if (_characterEncoding == null) + // First try explicit char encoding. + if (_characterEncoding != null) + return _characterEncoding; + + String encoding; + + // Try charset from mime type. + if (_mimeType != null && _mimeType.isCharsetAssumed()) + return _mimeType.getCharsetString(); + + // Try charset assumed from content type. + encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType); + if (encoding != null) + return encoding; + + // Try char set inferred from content type. + encoding = MimeTypes.getCharsetInferredFromContentType(_contentType); + if (encoding != null) { - String encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType); - if (encoding != null) - return encoding; + setCharacterEncoding(encoding, EncodingFrom.INFERRED); + return encoding; + } - encoding = MimeTypes.getCharsetInferredFromContentType(_contentType); + // Try any default char encoding for the context. + Context context = _channel.getRequest().getContext(); + if (context != null) + { + encoding = context.getResponseCharacterEncoding(); if (encoding != null) return encoding; - - Context context = _channel.getRequest().getContext(); - if (context != null) - { - encoding = context.getResponseCharacterEncoding(); - if (encoding != null) - return encoding; - } - return StringUtil.__ISO_8859_1; } - return _characterEncoding; + + // Fallback to last resort iso-8859-1. + return StringUtil.__ISO_8859_1; } @Override @@ -838,44 +852,7 @@ public PrintWriter getWriter() throws IOException if (_outputType == OutputType.NONE) { - // First try explicit char encoding. - String encoding = _characterEncoding; - - // Try charset from mime type. - if (encoding == null) - { - if (_mimeType != null && _mimeType.isCharsetAssumed()) - encoding = _mimeType.getCharsetString(); - } - - // Try charset assumed from content type. - if (encoding == null) - { - encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType); - } - - // Try char set inferred from content type. - if (encoding == null) - { - encoding = MimeTypes.getCharsetInferredFromContentType(_contentType); - setCharacterEncoding(encoding, EncodingFrom.INFERRED); - } - - // Try any default char encoding for the context. - if (encoding == null) - { - Context context = _channel.getRequest().getContext(); - if (context != null) - encoding = context.getResponseCharacterEncoding(); - } - - // Fallback to last resort iso-8859-1. - if (encoding == null) - { - encoding = StringUtil.__ISO_8859_1; - setCharacterEncoding(encoding, EncodingFrom.INFERRED); - } - + String encoding = getCharacterEncoding(); Locale locale = getLocale(); if (_writer != null && _writer.isFor(locale, encoding)) _writer.reopen(); @@ -1331,21 +1308,34 @@ public boolean isCommitted() @Override public void setLocale(Locale locale) { - if (locale == null || isCommitted() || !isMutable()) + if (isCommitted() || !isMutable()) return; - _locale = locale; - _fields.put(HttpHeader.CONTENT_LANGUAGE, StringUtil.replace(locale.toString(), '_', '-')); - - if (_outputType != OutputType.NONE) + if (locale == null) + { + _locale = null; + _fields.remove(HttpHeader.CONTENT_LANGUAGE); + if (_encodingFrom == EncodingFrom.SET_LOCALE) + setCharacterEncoding(null, EncodingFrom.NOT_SET); return; + } + else + { - if (_channel.getRequest().getContext() == null) - return; + _locale = locale; + _fields.put(HttpHeader.CONTENT_LANGUAGE, StringUtil.replace(locale.toString(), '_', '-')); + + if (_outputType != OutputType.NONE) + return; + + Context context = _channel.getRequest().getContext(); + if (context == null) + return; - String charset = _channel.getRequest().getContext().getContextHandler().getLocaleEncoding(locale); - if (charset != null && charset.length() > 0 && __localeOverride.contains(_encodingFrom)) - setCharacterEncoding(charset, EncodingFrom.SET_LOCALE); + String charset = context.getContextHandler().getLocaleEncoding(locale); + if (!StringUtil.isEmpty(charset) && __localeOverride.contains(_encodingFrom)) + setCharacterEncoding(charset, EncodingFrom.SET_LOCALE); + } } @Override @@ -1403,20 +1393,17 @@ else if (contentLength > NO_CONTENT_LENGTH) HttpField ct = content.getContentType(); if (ct != null) { - if (_characterEncoding != null && - content.getCharacterEncoding() == null && - content.getContentTypeValue() != null && - __explicitCharset.contains(_encodingFrom)) - { - setContentType(MimeTypes.getContentTypeWithoutCharset(content.getContentTypeValue())); - } - else + if (!__explicitCharset.contains(_encodingFrom)) { _fields.put(ct); _contentType = ct.getValue(); _characterEncoding = content.getCharacterEncoding(); _mimeType = content.getMimeType(); } + else + { + setContentType(content.getContentTypeValue()); + } } HttpField ce = content.getContentEncoding(); From 908acd59046d9d72e236870aa513a9f5e946b445 Mon Sep 17 00:00:00 2001 From: Lachlan Roberts Date: Mon, 14 Dec 2020 17:22:36 +1100 Subject: [PATCH 3/5] The default iso-8859-1 encoding should be set in the Content-Type. Signed-off-by: Lachlan Roberts --- .../org/eclipse/jetty/server/Response.java | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java index dd5ccf46f008..da8437cb1875 100644 --- a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java +++ b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java @@ -777,6 +777,19 @@ public void setStatusWithReason(int sc, String message) @Override public String getCharacterEncoding() + { + return getCharacterEncoding(false); + } + + /** + * Private utility method to get the character encoding. + * A standard call to {@link #getCharacterEncoding()} should not change the Content-Type header. + * But when {@link #getWriter()} is called we must decide what Content Type to use, so this will allow an inferred + * charset to be set in in the Content-Type. + * @param setContentType set the Content-Type header if this is set to true, otherwise just calculate what it should be. + * @return the character encoding for this response. + */ + private String getCharacterEncoding(boolean setContentType) { // First try explicit char encoding. if (_characterEncoding != null) @@ -784,11 +797,11 @@ public String getCharacterEncoding() String encoding; - // Try charset from mime type. + // Try charset from mime type. TODO: should this be added to Content-Type header? if (_mimeType != null && _mimeType.isCharsetAssumed()) return _mimeType.getCharsetString(); - // Try charset assumed from content type. + // Try charset assumed from content type (assumed charsets are not added to content type header). encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType); if (encoding != null) return encoding; @@ -797,11 +810,12 @@ public String getCharacterEncoding() encoding = MimeTypes.getCharsetInferredFromContentType(_contentType); if (encoding != null) { - setCharacterEncoding(encoding, EncodingFrom.INFERRED); + if (setContentType) + setCharacterEncoding(encoding, EncodingFrom.INFERRED); return encoding; } - // Try any default char encoding for the context. + // Try any default char encoding for the context. TODO: should this be added to Content-Type header? Context context = _channel.getRequest().getContext(); if (context != null) { @@ -811,7 +825,10 @@ public String getCharacterEncoding() } // Fallback to last resort iso-8859-1. - return StringUtil.__ISO_8859_1; + encoding = StringUtil.__ISO_8859_1; + if (setContentType) + setCharacterEncoding(encoding, EncodingFrom.INFERRED); + return encoding; } @Override @@ -852,7 +869,7 @@ public PrintWriter getWriter() throws IOException if (_outputType == OutputType.NONE) { - String encoding = getCharacterEncoding(); + String encoding = getCharacterEncoding(true); Locale locale = getLocale(); if (_writer != null && _writer.isFor(locale, encoding)) _writer.reopen(); From b13fb8ad1e7ca96e7f6594e4c12099315b0e90de Mon Sep 17 00:00:00 2001 From: Lachlan Roberts Date: Tue, 15 Dec 2020 12:21:47 +1100 Subject: [PATCH 4/5] Resolve TODOs and other changes from review. Signed-off-by: Lachlan Roberts --- .../java/org/eclipse/jetty/server/Response.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java index da8437cb1875..93c9293d8288 100644 --- a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java +++ b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java @@ -109,8 +109,7 @@ private enum EncodingFrom NOT_SET, /** - * Character encoding was not explicitly set but has a default value defined by the {@code encoding.properties} file. - * @see MimeTypes#getInferredEncodings(). + * Character encoding was inferred from the Content-Type and will be added as a parameter to the Content-Type. */ INFERRED, @@ -784,9 +783,9 @@ public String getCharacterEncoding() /** * Private utility method to get the character encoding. * A standard call to {@link #getCharacterEncoding()} should not change the Content-Type header. - * But when {@link #getWriter()} is called we must decide what Content Type to use, so this will allow an inferred + * But when {@link #getWriter()} is called we must decide what Content-Type to use, so this will allow an inferred * charset to be set in in the Content-Type. - * @param setContentType set the Content-Type header if this is set to true, otherwise just calculate what it should be. + * @param setContentType if true allow the Content-Type header to be changed if character encoding was inferred. * @return the character encoding for this response. */ private String getCharacterEncoding(boolean setContentType) @@ -797,7 +796,7 @@ private String getCharacterEncoding(boolean setContentType) String encoding; - // Try charset from mime type. TODO: should this be added to Content-Type header? + // Try charset from mime type. if (_mimeType != null && _mimeType.isCharsetAssumed()) return _mimeType.getCharsetString(); @@ -815,13 +814,17 @@ private String getCharacterEncoding(boolean setContentType) return encoding; } - // Try any default char encoding for the context. TODO: should this be added to Content-Type header? + // Try any default char encoding for the context. Context context = _channel.getRequest().getContext(); if (context != null) { encoding = context.getResponseCharacterEncoding(); if (encoding != null) + { + if (setContentType) + setCharacterEncoding(encoding, EncodingFrom.INFERRED); return encoding; + } } // Fallback to last resort iso-8859-1. From 88a838fb176eb749764dc897c4f466d648b30d9d Mon Sep 17 00:00:00 2001 From: Lachlan Roberts Date: Tue, 15 Dec 2020 13:34:26 +1100 Subject: [PATCH 5/5] Introduce extra enum state for DEFAULT encoding. Signed-off-by: Lachlan Roberts --- .../org/eclipse/jetty/server/Response.java | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java index 93c9293d8288..963213bfd988 100644 --- a/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java +++ b/jetty-server/src/main/java/org/eclipse/jetty/server/Response.java @@ -108,6 +108,11 @@ private enum EncodingFrom */ NOT_SET, + /** + * Using the default character encoding from the context otherwise iso-8859-1. + */ + DEFAULT, + /** * Character encoding was inferred from the Content-Type and will be added as a parameter to the Content-Type. */ @@ -129,7 +134,7 @@ private enum EncodingFrom SET_CHARACTER_ENCODING } - private static final EnumSet __localeOverride = EnumSet.of(EncodingFrom.NOT_SET, EncodingFrom.INFERRED, EncodingFrom.SET_LOCALE); + private static final EnumSet __localeOverride = EnumSet.of(EncodingFrom.NOT_SET, EncodingFrom.DEFAULT, EncodingFrom.INFERRED, EncodingFrom.SET_LOCALE); private static final EnumSet __explicitCharset = EnumSet.of(EncodingFrom.SET_LOCALE, EncodingFrom.SET_CHARACTER_ENCODING, EncodingFrom.SET_CONTENT_TYPE); public Response(HttpChannel channel, HttpOutput out) @@ -785,7 +790,7 @@ public String getCharacterEncoding() * A standard call to {@link #getCharacterEncoding()} should not change the Content-Type header. * But when {@link #getWriter()} is called we must decide what Content-Type to use, so this will allow an inferred * charset to be set in in the Content-Type. - * @param setContentType if true allow the Content-Type header to be changed if character encoding was inferred. + * @param setContentType if true allow the Content-Type header to be changed if character encoding was inferred or the default encoding was used. * @return the character encoding for this response. */ private String getCharacterEncoding(boolean setContentType) @@ -822,7 +827,7 @@ private String getCharacterEncoding(boolean setContentType) if (encoding != null) { if (setContentType) - setCharacterEncoding(encoding, EncodingFrom.INFERRED); + setCharacterEncoding(encoding, EncodingFrom.DEFAULT); return encoding; } } @@ -830,7 +835,7 @@ private String getCharacterEncoding(boolean setContentType) // Fallback to last resort iso-8859-1. encoding = StringUtil.__ISO_8859_1; if (setContentType) - setCharacterEncoding(encoding, EncodingFrom.INFERRED); + setCharacterEncoding(encoding, EncodingFrom.DEFAULT); return encoding; } @@ -1083,18 +1088,8 @@ public void setContentType(String contentType) { case NOT_SET: break; + case DEFAULT: case INFERRED: - if (isWriting()) - { - _contentType = _contentType + ";charset=" + _characterEncoding; - _mimeType = MimeTypes.CACHE.get(_contentType); - } - else - { - _encodingFrom = EncodingFrom.NOT_SET; - _characterEncoding = null; - } - break; case SET_CONTENT_TYPE: case SET_LOCALE: case SET_CHARACTER_ENCODING: @@ -1294,8 +1289,7 @@ public void setTrailerFields(Supplier> trailers) protected MetaData.Response newResponseMetaData() { - MetaData.Response info = new MetaData.Response(_channel.getRequest().getHttpVersion(), getStatus(), getReason(), _fields, getLongContentLength(), getTrailers()); - return info; + return new MetaData.Response(_channel.getRequest().getHttpVersion(), getStatus(), getReason(), _fields, getLongContentLength(), getTrailers()); } /** @@ -1337,11 +1331,9 @@ public void setLocale(Locale locale) _fields.remove(HttpHeader.CONTENT_LANGUAGE); if (_encodingFrom == EncodingFrom.SET_LOCALE) setCharacterEncoding(null, EncodingFrom.NOT_SET); - return; } else { - _locale = locale; _fields.put(HttpHeader.CONTENT_LANGUAGE, StringUtil.replace(locale.toString(), '_', '-'));