diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index ba00c20494..c6ff21cc96 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -18,6 +18,7 @@ import org.apache.commons.io.FilenameUtils; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; @@ -85,7 +86,7 @@ public enum OUTPUT_TYPE { private int enableImageProcessing = 0; // Path to ImageMagick program, if not on system path. - private String ImageMagickPath = ""; + private String imageMagickPath = ""; // resolution of processed image (in dpi). private int density = 300; @@ -198,14 +199,19 @@ public String getTesseractPath() { } /** - * Set the path to the Tesseract executable, needed if it is not on system path. + * Set the path to the Tesseract executable's directory, needed if it is not on system path. *
* Note that if you set this value, it is highly recommended that you also * set the path to the 'tessdata' folder using {@link #setTessdataPath}. *
*/ public void setTesseractPath(String tesseractPath) { - this.tesseractPath = FilenameUtils.normalize(tesseractPath); + + tesseractPath = FilenameUtils.normalize(tesseractPath); + if (!tesseractPath.isEmpty() && !tesseractPath.endsWith(File.separator)) + tesseractPath += File.separator; + + this.tesseractPath = tesseractPath; } /** @@ -221,7 +227,11 @@ public String getTessdataPath() { * (such as when Tesseract is built from source), it may be located elsewhere. */ public void setTessdataPath(String tessdataPath) { - this.tessdataPath = FilenameUtils.normalize(tessdataPath); + tessdataPath = FilenameUtils.normalize(tessdataPath); + if (!tessdataPath.isEmpty() && !tessdataPath.endsWith(File.separator)) + tessdataPath += File.separator; + + this.tessdataPath = tessdataPath; } /** @@ -515,21 +525,25 @@ public void setResize(int resize) { } /** - * @return path to ImageMagick file. - * @see #setImageMagickPath(String ImageMagickPath) + * @return path to ImageMagick executable directory. + * @see #setImageMagickPath(String imageMagickPath) */ public String getImageMagickPath() { - return ImageMagickPath; + return imageMagickPath; } /** - * Set the path to the ImageMagick executable, needed if it is not on system path. + * Set the path to the ImageMagick executable directory, needed if it is not on system path. * - * @param ImageMagickPath to ImageMagick file. + * @param imageMagickPath to ImageMagick executable directory. */ - public void setImageMagickPath(String ImageMagickPath) { - this.ImageMagickPath = FilenameUtils.normalize(ImageMagickPath); + public void setImageMagickPath(String imageMagickPath) { + imageMagickPath = FilenameUtils.normalize(imageMagickPath); + if (!imageMagickPath.isEmpty() && !imageMagickPath.endsWith(File.separator)) + imageMagickPath += File.separator; + + this.imageMagickPath = imageMagickPath; } /** diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index 5bd2badba7..4d81023d33 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -152,7 +152,8 @@ public boolean hasTesseract(TesseractOCRConfig config) { TESSERACT_PRESENT.clear(); } //check that the parent directory exists - if (! Files.isDirectory(Paths.get(config.getTesseractPath()))) { + if (! config.getTesseractPath().isEmpty() && + ! Files.isDirectory(Paths.get(config.getTesseractPath()))) { TESSERACT_PRESENT.put(tesseract, false); return false; } @@ -178,7 +179,8 @@ private boolean hasImageMagick(TesseractOCRConfig config) { IMAGE_MAGICK_PRESENT.clear(); } //check that directory exists - if (! Files.isDirectory(Paths.get(config.getImageMagickPath()))) { + if (!config.getImageMagickPath().isEmpty() && + ! Files.isDirectory(Paths.get(config.getImageMagickPath()))) { IMAGE_MAGICK_PRESENT.put(ImageMagick, false); return false; } @@ -378,7 +380,7 @@ private void processImage(File scratchFile, TesseractOCRConfig config) throws IO "-density", Integer.toString(config.getDensity()), "-depth ", Integer.toString(config.getDepth()), "-colorspace", config.getColorspace(), - " -filter ", config.getFilter(), + "-filter", config.getFilter(), "-resize", config.getResize() + "%", "-rotate", angle, scratchFile.getAbsolutePath(), diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java index 7517f7b1c2..a4bd515c5d 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java @@ -219,7 +219,33 @@ public void testBogusPathCheck() { //allow path that doesn't actually exist TesseractOCRConfig config = new TesseractOCRConfig(); config.setTesseractPath("blahdeblahblah"); - assertEquals("blahdeblahblah", config.getTesseractPath()); + assertEquals("blahdeblahblah"+File.separator, config.getTesseractPath()); + } + + @Test + public void testTrailingSlashInPathBehavior() { + + TesseractOCRConfig config = new TesseractOCRConfig(); + config.setTesseractPath("blah"); + assertEquals("blah"+File.separator, config.getTesseractPath()); + config.setTesseractPath("blah"+File.separator); + assertEquals("blah"+File.separator, config.getTesseractPath()); + config.setTesseractPath(""); + assertEquals("", config.getTesseractPath()); + + config.setTessdataPath("blahdata"); + assertEquals("blahdata"+File.separator, config.getTessdataPath()); + config.setTessdataPath("blahdata"+File.separator); + assertEquals("blahdata"+File.separator, config.getTessdataPath()); + config.setTessdataPath(""); + assertEquals("", config.getTessdataPath()); + + config.setImageMagickPath("imagemagickpath"); + assertEquals("imagemagickpath"+File.separator, config.getImageMagickPath()); + config.setImageMagickPath("imagemagickpath"+File.separator); + assertEquals("imagemagickpath"+File.separator, config.getImageMagickPath()); + config.setImageMagickPath(""); + assertEquals("", config.getImageMagickPath()); } @Test(expected=IllegalArgumentException.class)