Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup of charset usage in Response #5807

Merged
merged 5 commits into from
Dec 21, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 138 additions & 124 deletions jetty-server/src/main/java/org/eclipse/jetty/server/Response.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,39 @@ public enum OutputType

private enum EncodingFrom
{
NOT_SET, INFERRED, SET_LOCALE, SET_CONTENT_TYPE, SET_CHARACTER_ENCODING
/**
* Character encoding was not set, or the encoding was cleared with {@code setCharacterEncoding(null)}.
*/
NOT_SET,

/**
* Using the default character encoding from the context otherwise iso-8859-1.
*/
DEFAULT,

/**
* Character encoding was inferred from the Content-Type and will be added as a parameter to the Content-Type.
*/
INFERRED,

/**
* The default character encoding of the locale was used after a call to {@link #setLocale(Locale)}.
*/
SET_LOCALE,

/**
* The character encoding has been explicitly set using the Content-Type charset parameter with {@link #setContentType(String)}.
*/
SET_CONTENT_TYPE,

/**
* The character encoding has been explicitly set using {@link #setCharacterEncoding(String)}.
*/
SET_CHARACTER_ENCODING
}

private static final EnumSet<EncodingFrom> __localeOverride = EnumSet.of(EncodingFrom.NOT_SET, EncodingFrom.INFERRED, EncodingFrom.SET_LOCALE);
private static final EnumSet<EncodingFrom> __explicitCharset = EnumSet.of(EncodingFrom.SET_LOCALE, EncodingFrom.SET_CHARACTER_ENCODING);
private static final EnumSet<EncodingFrom> __localeOverride = EnumSet.of(EncodingFrom.NOT_SET, EncodingFrom.DEFAULT, EncodingFrom.INFERRED, EncodingFrom.SET_LOCALE);
private static final EnumSet<EncodingFrom> __explicitCharset = EnumSet.of(EncodingFrom.SET_LOCALE, EncodingFrom.SET_CHARACTER_ENCODING, EncodingFrom.SET_CONTENT_TYPE);

public Response(HttpChannel channel, HttpOutput out)
{
Expand Down Expand Up @@ -754,26 +782,61 @@ public void setStatusWithReason(int sc, String message)
@Override
public String getCharacterEncoding()
{
if (_characterEncoding == null)
return getCharacterEncoding(false);
}

/**
* Private utility method to get the character encoding.
* A standard call to {@link #getCharacterEncoding()} should not change the Content-Type header.
* But when {@link #getWriter()} is called we must decide what Content-Type to use, so this will allow an inferred
* charset to be set in in the Content-Type.
* @param setContentType if true allow the Content-Type header to be changed if character encoding was inferred or the default encoding was used.
* @return the character encoding for this response.
*/
private String getCharacterEncoding(boolean setContentType)
{
// First try explicit char encoding.
if (_characterEncoding != null)
return _characterEncoding;

String encoding;

// Try charset from mime type.
if (_mimeType != null && _mimeType.isCharsetAssumed())
return _mimeType.getCharsetString();

// Try charset assumed from content type (assumed charsets are not added to content type header).
encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType);
if (encoding != null)
return encoding;

// Try char set inferred from content type.
encoding = MimeTypes.getCharsetInferredFromContentType(_contentType);
if (encoding != null)
{
String encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType);
if (encoding != null)
return encoding;
if (setContentType)
setCharacterEncoding(encoding, EncodingFrom.INFERRED);
return encoding;
}

encoding = MimeTypes.getCharsetInferredFromContentType(_contentType);
// Try any default char encoding for the context.
Context context = _channel.getRequest().getContext();
if (context != null)
{
encoding = context.getResponseCharacterEncoding();
if (encoding != null)
return encoding;

Context context = _channel.getRequest().getContext();
if (context != null)
{
encoding = context.getResponseCharacterEncoding();
if (encoding != null)
return encoding;
if (setContentType)
setCharacterEncoding(encoding, EncodingFrom.DEFAULT);
return encoding;
}
return StringUtil.__ISO_8859_1;
}
return _characterEncoding;

// Fallback to last resort iso-8859-1.
encoding = StringUtil.__ISO_8859_1;
if (setContentType)
setCharacterEncoding(encoding, EncodingFrom.DEFAULT);
return encoding;
}

@Override
Expand Down Expand Up @@ -814,46 +877,8 @@ public PrintWriter getWriter() throws IOException

if (_outputType == OutputType.NONE)
{
//first try explicit char encoding
String encoding = _characterEncoding;

//try char set from mime type
if (encoding == null)
{
if (_mimeType != null && _mimeType.isCharsetAssumed())
encoding = _mimeType.getCharsetString();
}

//try char set assumed from content type
if (encoding == null)
{
encoding = MimeTypes.getCharsetAssumedFromContentType(_contentType);
}

//try char set inferred from content type
if (encoding == null)
{
encoding = MimeTypes.getCharsetInferredFromContentType(_contentType);
setCharacterEncoding(encoding, EncodingFrom.INFERRED);
}

//try any default char encoding for the context
if (encoding == null)
{
Context context = _channel.getRequest().getContext();
if (context != null)
encoding = context.getResponseCharacterEncoding();
}

//fallback to last resort iso-8859-1
if (encoding == null)
{
encoding = StringUtil.__ISO_8859_1;
setCharacterEncoding(encoding, EncodingFrom.INFERRED);
}

String encoding = getCharacterEncoding(true);
Locale locale = getLocale();

if (_writer != null && _writer.isFor(locale, encoding))
_writer.reopen();
else
Expand All @@ -866,7 +891,7 @@ else if (StringUtil.__UTF8.equalsIgnoreCase(encoding))
_writer = new ResponseWriter(new EncodingHttpWriter(_out, encoding), locale, encoding);
}

// Set the output type at the end, because setCharacterEncoding() checks for it
// Set the output type at the end, because setCharacterEncoding() checks for it.
_outputType = OutputType.WRITER;
}
return _writer;
Expand Down Expand Up @@ -988,54 +1013,47 @@ public void setCharacterEncoding(String encoding)

private void setCharacterEncoding(String encoding, EncodingFrom from)
{
if (!isMutable() || isWriting())
if (!isMutable() || isWriting() || isCommitted())
return;

if (_outputType != OutputType.WRITER && !isCommitted())
if (encoding == null)
{
if (encoding == null)
_encodingFrom = EncodingFrom.NOT_SET;
if (_characterEncoding != null)
{
_encodingFrom = EncodingFrom.NOT_SET;

// Clear any encoding.
if (_characterEncoding != null)
{
_characterEncoding = null;

if (_mimeType != null)
{
_mimeType = _mimeType.getBaseType();
_contentType = _mimeType.asString();
_fields.put(_mimeType.getContentTypeField());
}
else if (_contentType != null)
{
_contentType = MimeTypes.getContentTypeWithoutCharset(_contentType);
_fields.put(HttpHeader.CONTENT_TYPE, _contentType);
}
}
}
else
{
// No, so just add this one to the mimetype
_encodingFrom = from;
_characterEncoding = HttpGenerator.__STRICT ? encoding : StringUtil.normalizeCharset(encoding);
_characterEncoding = null;
if (_mimeType != null)
{
_contentType = _mimeType.getBaseType().asString() + ";charset=" + _characterEncoding;
_mimeType = MimeTypes.CACHE.get(_contentType);
if (_mimeType == null || HttpGenerator.__STRICT)
_fields.put(HttpHeader.CONTENT_TYPE, _contentType);
else
_fields.put(_mimeType.getContentTypeField());
_mimeType = _mimeType.getBaseType();
_contentType = _mimeType.asString();
_fields.put(_mimeType.getContentTypeField());
}
else if (_contentType != null)
{
_contentType = MimeTypes.getContentTypeWithoutCharset(_contentType) + ";charset=" + _characterEncoding;
_contentType = MimeTypes.getContentTypeWithoutCharset(_contentType);
_fields.put(HttpHeader.CONTENT_TYPE, _contentType);
}
}
}
else
{
_encodingFrom = from;
_characterEncoding = HttpGenerator.__STRICT ? encoding : StringUtil.normalizeCharset(encoding);
if (_mimeType != null)
{
_contentType = _mimeType.getBaseType().asString() + ";charset=" + _characterEncoding;
_mimeType = MimeTypes.CACHE.get(_contentType);
if (_mimeType == null || HttpGenerator.__STRICT)
_fields.put(HttpHeader.CONTENT_TYPE, _contentType);
else
_fields.put(_mimeType.getContentTypeField());
}
else if (_contentType != null)
{
_contentType = MimeTypes.getContentTypeWithoutCharset(_contentType) + ";charset=" + _characterEncoding;
_fields.put(HttpHeader.CONTENT_TYPE, _contentType);
}
}
}

@Override
Expand Down Expand Up @@ -1070,18 +1088,8 @@ public void setContentType(String contentType)
{
case NOT_SET:
break;
case DEFAULT:
case INFERRED:
if (isWriting())
{
_contentType = _contentType + ";charset=" + _characterEncoding;
_mimeType = MimeTypes.CACHE.get(_contentType);
}
else
{
_encodingFrom = EncodingFrom.NOT_SET;
_characterEncoding = null;
}
break;
case SET_CONTENT_TYPE:
case SET_LOCALE:
case SET_CHARACTER_ENCODING:
Expand Down Expand Up @@ -1281,8 +1289,7 @@ public void setTrailerFields(Supplier<Map<String, String>> trailers)

protected MetaData.Response newResponseMetaData()
{
MetaData.Response info = new MetaData.Response(_channel.getRequest().getHttpVersion(), getStatus(), getReason(), _fields, getLongContentLength(), getTrailers());
return info;
return new MetaData.Response(_channel.getRequest().getHttpVersion(), getStatus(), getReason(), _fields, getLongContentLength(), getTrailers());
}

/**
Expand Down Expand Up @@ -1315,22 +1322,32 @@ public boolean isCommitted()
@Override
public void setLocale(Locale locale)
{
if (locale == null || isCommitted() || !isMutable())
if (isCommitted() || !isMutable())
return;

_locale = locale;
_fields.put(HttpHeader.CONTENT_LANGUAGE, StringUtil.replace(locale.toString(), '_', '-'));

if (_outputType != OutputType.NONE)
return;
if (locale == null)
{
_locale = null;
_fields.remove(HttpHeader.CONTENT_LANGUAGE);
if (_encodingFrom == EncodingFrom.SET_LOCALE)
setCharacterEncoding(null, EncodingFrom.NOT_SET);
}
else
{
_locale = locale;
_fields.put(HttpHeader.CONTENT_LANGUAGE, StringUtil.replace(locale.toString(), '_', '-'));

if (_channel.getRequest().getContext() == null)
return;
if (_outputType != OutputType.NONE)
return;

String charset = _channel.getRequest().getContext().getContextHandler().getLocaleEncoding(locale);
Context context = _channel.getRequest().getContext();
if (context == null)
return;

if (charset != null && charset.length() > 0 && __localeOverride.contains(_encodingFrom))
setCharacterEncoding(charset, EncodingFrom.SET_LOCALE);
String charset = context.getContextHandler().getLocaleEncoding(locale);
if (!StringUtil.isEmpty(charset) && __localeOverride.contains(_encodingFrom))
setCharacterEncoding(charset, EncodingFrom.SET_LOCALE);
}
}

@Override
Expand Down Expand Up @@ -1388,20 +1405,17 @@ else if (contentLength > NO_CONTENT_LENGTH)
HttpField ct = content.getContentType();
if (ct != null)
{
if (_characterEncoding != null &&
content.getCharacterEncoding() == null &&
content.getContentTypeValue() != null &&
__explicitCharset.contains(_encodingFrom))
{
setContentType(MimeTypes.getContentTypeWithoutCharset(content.getContentTypeValue()));
}
else
if (!__explicitCharset.contains(_encodingFrom))
{
_fields.put(ct);
_contentType = ct.getValue();
_characterEncoding = content.getCharacterEncoding();
_mimeType = content.getMimeType();
}
else
{
setContentType(content.getContentTypeValue());
}
}

HttpField ce = content.getContentEncoding();
Expand Down