Skip to content

Commit

Permalink
Fixes danfickle#588, fixes danfickle#564, changes danfickle#568, rela…
Browse files Browse the repository at this point in the history
…tes danfickle#550 - Only filter out known problematic characters

Can not be too aggressive as some fonts contain private area code points, etc and expect them to be output.
  • Loading branch information
danfickle committed Oct 30, 2020
1 parent ed2bd9c commit 9ffd0e4
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ public interface TextRenderer {
*
* @param input The string can be null
* @return The cleaned string or <code>null</code> if the input is null
* @see com.openhtmltopdf.util.OpenUtil#isCodePointPrintable(int)
* @see com.openhtmltopdf.util.OpenUtil#isSafeFontCodePointToPrint(int)
*/
static String getEffectivePrintableString(String input) {
public static String getEffectivePrintableString(String input) {
if (input == null || input.isEmpty() || areAllCharactersPrintable(input)) {
return input;
}

StringBuilder effective = new StringBuilder(input.length());
input.codePoints().filter(OpenUtil::isCodePointPrintable).forEach(effective::appendCodePoint);
input.codePoints().filter(OpenUtil::isSafeFontCodePointToPrint).forEach(effective::appendCodePoint);

return effective.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ private OpenUtil() {}
* Checks if a code point is printable. If false, it can be safely discarded at the
* rendering stage, else it should be replaced with the replacement character,
* if a suitable glyph can not be found.
*
* NOTE: This should only be called after a character has been shown to be
* NOT present in the font. It can not be called beforehand because some fonts
* contain private area characters and so on. Issue#588.
*
* @param codePoint
* @return whether codePoint is printable
*/
Expand All @@ -26,14 +31,31 @@ public static boolean isCodePointPrintable(int codePoint) {
category == Character.SURROGATE);
}

/**
* Whether the code point should be passed through to the font
* for rendering. It effectively filters out characters that
* have been shown to be problematic in some (broken) fonts such
* as visible soft-hyphens.
*/
public static boolean isSafeFontCodePointToPrint(int codePoint) {
switch (codePoint) {
case 0xAD: // Soft hyphen, PR#550, FALLTHRU
case 0xFFFC: // Object replacement character, Issue#564.
return false;

default:
return true;
}
}

/**
* Returns <code>true</code>, when all characters of the given string are printable.
* @param str a non-null string to test
* @return whether all characters are printable
*/
public static boolean areAllCharactersPrintable(String str) {
Objects.requireNonNull(str, "str");
return str.codePoints().allMatch(OpenUtil::isCodePointPrintable);
return str.codePoints().allMatch(OpenUtil::isSafeFontCodePointToPrint);
}

public static Integer parseIntegerOrNull(String possibleInteger) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import com.openhtmltopdf.simple.extend.ReplacedElementScaleHelper;
import com.openhtmltopdf.util.ArrayUtil;
import com.openhtmltopdf.util.LogMessageId;
import com.openhtmltopdf.util.OpenUtil;
import com.openhtmltopdf.util.XRLog;
import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2D;
import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2DFontTextDrawer;
Expand Down Expand Up @@ -74,10 +73,6 @@
import java.util.Map.Entry;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;

import static com.openhtmltopdf.util.OpenUtil.areAllCharactersPrintable;

public class PdfBoxFastOutputDevice extends AbstractOutputDevice implements OutputDevice, PdfBoxOutputDevice {
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.stream.IntStream;

import com.openhtmltopdf.bidi.BidiReorderer;
import com.openhtmltopdf.extend.FontContext;
Expand All @@ -35,11 +34,10 @@
import com.openhtmltopdf.render.FSFontMetrics;
import com.openhtmltopdf.render.JustificationInfo;
import com.openhtmltopdf.util.LogMessageId;
import com.openhtmltopdf.util.OpenUtil;
import com.openhtmltopdf.util.ThreadCtx;
import com.openhtmltopdf.util.XRLog;

import static com.openhtmltopdf.util.OpenUtil.isCodePointPrintable;

public class PdfBoxTextRenderer implements TextRenderer {
private static float TEXT_MEASURING_DELTA = 0.01f;

Expand Down Expand Up @@ -194,7 +192,9 @@ public static List<FontRun> divideIntoFontRuns(FSFont font, String str, BidiReor
i += Character.charCount(unicode);
String ch = String.valueOf(Character.toChars(unicode));

if (!isCodePointPrintable(unicode)) {
if (!OpenUtil.isSafeFontCodePointToPrint(unicode)) {
// Filter out characters that should never be visible (such
// as soft-hyphen) but are in some fonts.
continue;
}

Expand Down Expand Up @@ -264,8 +264,14 @@ else if (des != current.des) {
}
}
}

if (!gotChar) {
if (!OpenUtil.isCodePointPrintable(unicode)) {
// Filter out control, etc characters when they
// are not present in any font.
continue;
}

// We still don't have the character after all that. So use replacement character.
if (current.des == null) {
// First character of run.
Expand Down Expand Up @@ -366,6 +372,8 @@ public int getSmoothingLevel() {
return 0;
}

@Deprecated
@Override
public void setSmoothingLevel(int level) {
}

Expand Down

0 comments on commit 9ffd0e4

Please sign in to comment.