diff --git a/bom/runtime/pom.xml b/bom/runtime/pom.xml index c78d51dceb556..6e19dfda82270 100644 --- a/bom/runtime/pom.xml +++ b/bom/runtime/pom.xml @@ -182,6 +182,8 @@ 1.1.1 2.5.2 2.2.0 + 1.4 + 3.0.2 @@ -1126,6 +1128,10 @@ org.apache.cxf cxf-rt-rs-client + + org.apache.poi + poi-ooxml-schemas + ${tika.version} @@ -2616,6 +2622,12 @@ org.kie.kogito drools-decisiontables ${kogito.version} + + + org.apache.poi + poi-ooxml + + org.kie.kogito @@ -2762,6 +2774,25 @@ quarkus-banner ${project.version} + + + + org.apache.poi + ooxml-schemas + ${ooxml-schemas.version} + + + org.apache.xmlbeans + xmlbeans + + + + + org.apache.xmlbeans + xmlbeans + ${xmlbeans.version} + + diff --git a/core/deployment/src/main/java/io/quarkus/deployment/util/ReflectUtil.java b/core/deployment/src/main/java/io/quarkus/deployment/util/ReflectUtil.java index 3381cc95ebc7b..f803cc0a3da2c 100644 --- a/core/deployment/src/main/java/io/quarkus/deployment/util/ReflectUtil.java +++ b/core/deployment/src/main/java/io/quarkus/deployment/util/ReflectUtil.java @@ -1,15 +1,6 @@ package io.quarkus.deployment.util; -import java.lang.reflect.AnnotatedElement; -import java.lang.reflect.Array; -import java.lang.reflect.Field; -import java.lang.reflect.GenericArrayType; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Member; -import java.lang.reflect.Parameter; -import java.lang.reflect.ParameterizedType; -import java.lang.reflect.Type; -import java.lang.reflect.UndeclaredThrowableException; +import java.lang.reflect.*; import java.util.Arrays; import java.util.List; import java.util.Optional; diff --git a/extensions/arc/runtime/pom.xml b/extensions/arc/runtime/pom.xml index 8f0d2a3bc607d..7fb3f9081a5b9 100644 --- a/extensions/arc/runtime/pom.xml +++ b/extensions/arc/runtime/pom.xml @@ -27,6 +27,17 @@ org.eclipse.microprofile.context-propagation microprofile-context-propagation-api + + xalan + xalan + + + xml-apis + xml-apis + + + 2.7.2 + diff --git a/extensions/tika/deployment/pom.xml b/extensions/tika/deployment/pom.xml index 67b9532939771..6d1c1c4fa6ccf 100644 --- a/extensions/tika/deployment/pom.xml +++ b/extensions/tika/deployment/pom.xml @@ -12,6 +12,10 @@ quarkus-tika-deployment Quarkus - Apache Tika - Deployment + + 0.9.12 + + io.quarkus @@ -25,6 +29,24 @@ io.quarkus quarkus-tika + + + org.reflections + reflections + ${reflections.version} + + + + xalan + xalan + + + xml-apis + xml-apis + + + 2.7.2 + io.quarkus quarkus-junit5-internal diff --git a/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java b/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java index 2d44db53de4bf..632bdfb6f1471 100644 --- a/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java +++ b/extensions/tika/deployment/src/main/java/io/quarkus/tika/deployment/TikaProcessor.java @@ -1,21 +1,32 @@ package io.quarkus.tika.deployment; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.TransformerFactory; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.poi.ooxml.POIXMLDocumentPart; +import org.apache.poi.xslf.usermodel.XSLFTheme; +import org.apache.poi.xwpf.usermodel.XWPFSettings; +import org.apache.poi.xwpf.usermodel.XWPFStyles; import org.apache.tika.detect.Detector; import org.apache.tika.detect.EncodingDetector; import org.apache.tika.parser.Parser; +import org.apache.xerces.parsers.XIncludeAwareParserConfiguration; +import org.apache.xerces.xni.parser.XMLParserConfiguration; +import org.apache.xmlbeans.XmlObject; +import org.openxmlformats.schemas.drawingml.x2006.main.impl.ThemeDocumentImpl; +import org.openxmlformats.schemas.presentationml.x2006.main.STPlaceholderType; +import org.openxmlformats.schemas.presentationml.x2006.main.impl.PresentationDocumentImpl; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CalcChainDocumentImpl; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.DocumentDocumentImpl; import io.quarkus.arc.deployment.AdditionalBeanBuildItem; import io.quarkus.arc.deployment.BeanContainerBuildItem; @@ -26,9 +37,7 @@ import io.quarkus.deployment.annotations.Record; import io.quarkus.deployment.builditem.CapabilityBuildItem; import io.quarkus.deployment.builditem.FeatureBuildItem; -import io.quarkus.deployment.builditem.nativeimage.NativeImageResourceBuildItem; -import io.quarkus.deployment.builditem.nativeimage.RuntimeInitializedClassBuildItem; -import io.quarkus.deployment.builditem.nativeimage.ServiceProviderBuildItem; +import io.quarkus.deployment.builditem.nativeimage.*; import io.quarkus.deployment.util.ServiceUtil; import io.quarkus.tika.TikaParseException; import io.quarkus.tika.runtime.TikaConfiguration; @@ -46,9 +55,13 @@ public class TikaProcessor { "org.apache.tika.parser.geo.topic.GeoParser" }).collect(Collectors.toSet()); + public static final String PDF_PARSER_NAME = "pdf"; + public static final String OOXML_PARSER_NAME = "ooxml"; + private static final Map PARSER_ABBREVIATIONS = Arrays.stream(new String[][] { { "pdf", "org.apache.tika.parser.pdf.PDFParser" }, - { "odf", "org.apache.tika.parser.odf.OpenDocumentParser" } + { "odf", "org.apache.tika.parser.odf.OpenDocumentParser" }, + { OOXML_PARSER_NAME, "org.apache.tika.parser.microsoft.ooxml.OOXMLParser" }, }).collect(Collectors.toMap(kv -> kv[0], kv -> kv[1])); @BuildStep @@ -66,12 +79,6 @@ FeatureBuildItem feature() { return new FeatureBuildItem(FeatureBuildItem.TIKA); } - @BuildStep - public void registerRuntimeInitializedClasses(BuildProducer resource) { - //org.apache.tika.parser.pdf.PDFParser (https://issues.apache.org/jira/browse/PDFBOX-4548) - resource.produce(new RuntimeInitializedClassBuildItem("org.apache.pdfbox.pdmodel.font.PDType1Font")); - } - @BuildStep public void registerTikaCoreResources(BuildProducer resource) { resource.produce(new NativeImageResourceBuildItem("org/apache/tika/mime/tika-mimetypes.xml")); @@ -79,21 +86,141 @@ public void registerTikaCoreResources(BuildProducer resource) { - resource.produce(new NativeImageResourceBuildItem("org/apache/tika/parser/pdf/PDFParser.properties")); + public void registerPdfParser(TikaConfiguration configuration, + BuildProducer runtimeProducer, + BuildProducer nativeImageResourceProducer) { + if (doNotIncludeParser(configuration, PDF_PARSER_NAME)) { + return; + } + + //org.apache.tika.parser.pdf.PDFParser (https://issues.apache.org/jira/browse/PDFBOX-4548) + runtimeProducer.produce(new RuntimeInitializedClassBuildItem("org.apache.pdfbox.pdmodel.font.PDType1Font")); + + nativeImageResourceProducer + .produce(new NativeImageResourceBuildItem("org/apache/tika/parser/pdf/PDFParser.properties")); + + nativeImageResourceProducer + .produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/additional.txt")); + nativeImageResourceProducer + .produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/glyphlist.txt")); + nativeImageResourceProducer + .produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt")); } @BuildStep - public void registerPdfBoxResources(BuildProducer resource) { - resource.produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/additional.txt")); - resource.produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/glyphlist.txt")); - resource.produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt")); + public void registerOOXMLParser(TikaConfiguration configuration, + BuildProducer resource, + BuildProducer serviceProvider, + BuildProducer resourceBundle, + BuildProducer resourceDirectory, + BuildProducer resourceBuildItem) throws Exception { + //https://github.com/quarkusio/quarkus/issues/6549 + + if (doNotIncludeParser(configuration, OOXML_PARSER_NAME)) { + return; + } + + Map> fileFormatSupport = configuration.parserFileFormatSupport; + + boolean docxSupport = getOOXMLFileFormatSupport(fileFormatSupport, "docx", true); + boolean pptxSupport = getOOXMLFileFormatSupport(fileFormatSupport, "pptx", false); + boolean xlsxSupport = getOOXMLFileFormatSupport(fileFormatSupport, "xlsx", false); + + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xerces.impl.dv.dtd.DTDDVFactoryImpl")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xerces.impl.msg.XMLMessageFormatter")); + + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.poi.POIXMLTextExtractor")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.poi.openxml4j.opc.ZipPackagePart")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.poi.openxml4j.opc.PackagePart")); + + if (docxSupport) { + resource.produce(new ReflectiveClassBuildItem(true, true, true, XWPFSettings.class.getName())); + resource.produce(new ReflectiveClassBuildItem(true, true, true, XWPFStyles.class.getName())); + } + + if (pptxSupport) { + resource.produce(new ReflectiveClassBuildItem(true, true, true, STPlaceholderType.Enum.class.getName())); + getAllClassesFromPackage(XSLFTheme.class.getPackage().getName(), POIXMLDocumentPart.class) + .forEach(aClass -> resource.produce( + new ReflectiveClassBuildItem(true, true, true, aClass))); + } + + resource.produce(new ReflectiveClassBuildItem(true, true, true, POIXMLDocumentPart.class.getName())); + + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.values.XmlComplexContentImpl")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.schema.SchemaTypeLoaderImpl")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.schema.SchemaTypeImpl")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.schema.SchemaTypeSystemImpl")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.store.Cursor")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.store.Xobj")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.store.Xobj.AttrXobj")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.store.Xobj.ElementXobj")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.store.Xobj.DocumentXobj")); + resource.produce(new ReflectiveClassBuildItem(true, true, "org.apache.xmlbeans.impl.store.Locale")); + + resource.produce(new ReflectiveClassBuildItem(true, true, + "schemaorg_apache_xmlbeans.system.sD023D6490046BA0250A839A9AD24C443.TypeSystemHolder")); + + if (pptxSupport) { + getAllClassesFromPackage(ThemeDocumentImpl.class.getPackage().getName(), XmlObject.class) + .forEach(aClass -> resource.produce( + new ReflectiveClassBuildItem(true, true, true, aClass))); + getAllClassesFromPackage( + PresentationDocumentImpl.class.getPackage().getName(), + XmlObject.class) + .forEach(aClass -> resource.produce( + new ReflectiveClassBuildItem(true, true, true, aClass))); + } + + if (xlsxSupport) { + getAllClassesFromPackage(CalcChainDocumentImpl.class.getPackage().getName(), XmlObject.class) + .forEach(aClass -> resource.produce( + new ReflectiveClassBuildItem(true, true, true, aClass))); + } + + if (docxSupport) { + getAllClassesFromPackage(DocumentDocumentImpl.class.getPackage().getName(), XmlObject.class) + .forEach(aClass -> resource.produce( + new ReflectiveClassBuildItem(true, true, true, aClass))); + } + + serviceProvider.produce( + new ServiceProviderBuildItem(XMLParserConfiguration.class.getName(), + Arrays.asList(XIncludeAwareParserConfiguration.class.getName()))); + serviceProvider.produce( + new ServiceProviderBuildItem(SAXParserFactory.class.getName(), + getProviderNames(SAXParserFactory.class.getName()))); + serviceProvider.produce( + new ServiceProviderBuildItem(TransformerFactory.class.getName(), + getProviderNames(TransformerFactory.class.getName()))); + + resourceBundle.produce(new NativeImageResourceBundleBuildItem("org.apache.xerces.impl.msg.SAXMessages")); + + resourceDirectory.produce(new NativeImageResourceDirectoryBuildItem( + "schemaorg_apache_xmlbeans/system/sD023D6490046BA0250A839A9AD24C443")); + resourceBuildItem.produce(new NativeImageResourceBuildItem("org/apache/xalan/res/XSLTInfo.properties")); + resourceBuildItem.produce(new NativeImageResourceBuildItem("org/apache/xalan/internal/res/XSLTInfo.properties")); + } + + private Boolean getOOXMLFileFormatSupport(Map> fileFormatSupport, String fileFormat, + boolean defaultValue) { + return Optional.ofNullable(fileFormatSupport.get(OOXML_PARSER_NAME)) + .filter(map -> map.containsKey(fileFormat)) + .map(map -> map.get(fileFormat)) + .map(Boolean::parseBoolean) + .orElse(defaultValue); + } + + private boolean doNotIncludeParser(TikaConfiguration configuration, String parserName) { + return configuration.parsers.isPresent() + && !getParserAbbreviations(configuration.parsers).contains(parserName); } @BuildStep @Record(ExecutionTime.STATIC_INIT) void initializeTikaParser(BeanContainerBuildItem beanContainer, TikaRecorder recorder, - BuildProducer serviceProvider, TikaConfiguration configuration) + BuildProducer serviceProvider, + TikaConfiguration configuration) throws Exception { Map> parsers = getSupportedParserConfig(configuration.tikaConfigPath, configuration.parsers, @@ -124,8 +251,7 @@ public static Map> getSupportedParserConfig(Op return providerNames.stream().filter(pred).collect(Collectors.toMap(Function.identity(), p -> Collections. emptyList())); } else { - List abbreviations = Arrays.stream(requiredParsers.get().split(",")).map(s -> s.trim()) - .collect(Collectors.toList()); + List abbreviations = getParserAbbreviations(requiredParsers); Map fullNamesAndAbbreviations = abbreviations.stream() .collect(Collectors.toMap(p -> getParserNameFromConfig(p, parserAbbreviations), Function.identity())); return providerNames.stream().filter(pred).filter(p -> fullNamesAndAbbreviations.containsKey(p)) @@ -134,6 +260,24 @@ public static Map> getSupportedParserConfig(Op } } + public static Stream getAllClassesFromPackage(String packageName, Class... baseClasses) { + if (StringUtils.isBlank(packageName) || ArrayUtils.isEmpty(baseClasses)) { + return new ArrayList().stream(); + } + + org.reflections.Reflections reflections = new org.reflections.Reflections(packageName); + + return (Stream) Arrays.stream(baseClasses) + .flatMap(aClass -> reflections.getSubTypesOf(aClass).stream()) + .filter(aClass -> !((Class) aClass).isInterface()) + .filter(aClass -> ((Class) aClass).getPackage().getName().equals(packageName)); + } + + private static List getParserAbbreviations(Optional requiredParsers) { + return Arrays.stream(requiredParsers.get().split(",")).map(s -> s.trim()) + .collect(Collectors.toList()); + } + private static String generateTikaXmlConfiguration(Map> parserConfig) { StringBuilder tikaXmlConfigurationBuilder = new StringBuilder(); tikaXmlConfigurationBuilder.append(""); diff --git a/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java b/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java index 133e9e08217e2..cd62b805f4ef3 100644 --- a/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java +++ b/extensions/tika/deployment/src/test/java/io/quarkus/tika/deployment/TikaProcessorTest.java @@ -1,17 +1,14 @@ package io.quarkus.tika.deployment; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.*; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; +import java.util.stream.Collectors; +import org.apache.xmlbeans.XmlObject; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.RegisterExtension; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.DocumentDocumentImpl; import io.quarkus.test.QuarkusUnitTest; @@ -93,6 +90,14 @@ public void testUnhyphenation() { assertEquals("position", TikaProcessor.unhyphenate("position")); } + @Test + public void testReflection() { + List types = TikaProcessor + .getAllClassesFromPackage(DocumentDocumentImpl.class.getPackage().getName(), XmlObject.class) + .collect(Collectors.toList()); + assertTrue(types.size() > 0); + } + private Set getParserNames(String tikaConfigPath, String parsers) throws Exception { return TikaProcessor.getSupportedParserConfig( Optional.ofNullable(tikaConfigPath), Optional.ofNullable(parsers), @@ -106,4 +111,5 @@ private Map> getParserConfig(Str Optional.ofNullable(tikaConfigPath), Optional.ofNullable(parsers), parserParamMaps, parserAbbreviations); } + } diff --git a/extensions/tika/runtime/pom.xml b/extensions/tika/runtime/pom.xml index 3add5452064bc..5e823960377fe 100644 --- a/extensions/tika/runtime/pom.xml +++ b/extensions/tika/runtime/pom.xml @@ -39,6 +39,20 @@ + + org.apache.poi + ooxml-schemas + + + org.apache.xmlbeans + xmlbeans + + + + + org.apache.xmlbeans + xmlbeans + jakarta.annotation jakarta.annotation-api diff --git a/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java index 1069b37faad10..34639a084b3ed 100644 --- a/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java +++ b/extensions/tika/runtime/src/main/java/io/quarkus/tika/runtime/TikaConfiguration.java @@ -54,6 +54,22 @@ public class TikaConfiguration { @ConfigItem public Map> parserOptions; + /** + * Configuration of supporting file format for the individual parsers. + * Currently is used only for ooxml parser, because the higher number of + * supported formats there leads to OOM error during the native build. + * + * Example: + * + *
+     * quarkus.tika.parsers = ooxml
+     * quarkus.tika.parser-file-format-support.ooxml.docx = true
+     * quarkus.tika.parser-file-format-support.ooxml.pptx = false
+     * quarkus.tika.parser-file-format-support.ooxml.xlsx = true
+     */
+    @ConfigItem
+    public Map> parserFileFormatSupport;
+
     /**
      * Full parser class name for a given parser abbreviation.
      * For example:
diff --git a/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java b/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java
index 995ecbb18e524..e85313ad0dee5 100644
--- a/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java
+++ b/integration-tests/tika/src/main/java/io/quarkus/it/tika/TikaEmdeddedContentResource.java
@@ -6,14 +6,17 @@
 import javax.ws.rs.POST;
 import javax.ws.rs.Path;
 import javax.ws.rs.Produces;
+import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.MediaType;
 
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.parser.microsoft.OfficeParser;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
 import org.apache.tika.parser.pdf.PDFParser;
 
 import io.quarkus.tika.TikaContent;
+import io.quarkus.tika.TikaMetadata;
 import io.quarkus.tika.TikaParser;
 
 @Path("/embedded")
@@ -21,7 +24,7 @@ public class TikaEmdeddedContentResource {
 
     // Avoiding the injection, otherwise the recorded tika-config.xml intended for TikaPdfInvoiceTest is used
     TikaParser parser = new TikaParser(new RecursiveParserWrapper(
-            new AutoDetectParser(new OfficeParser(), new PDFParser()), true), false);
+            new AutoDetectParser(new OfficeParser(), new PDFParser(), new OOXMLParser()), true), false);
 
     @POST
     @Path("/outerText")
@@ -40,4 +43,12 @@ public String extractInnerText(InputStream stream) {
         TikaContent content = parser.parse(stream);
         return content.getEmbeddedContent().get(0).getText();
     }
+
+    @POST
+    @Path("/contentType")
+    @Produces(MediaType.TEXT_PLAIN)
+    public String contentType(InputStream stream) {
+        TikaMetadata metadata = parser.getMetadata(stream);
+        return metadata.getSingleValue(HttpHeaders.CONTENT_TYPE);
+    }
 }
diff --git a/integration-tests/tika/src/main/resources/application.properties b/integration-tests/tika/src/main/resources/application.properties
index e93410248ccb4..17b19ac77d34c 100644
--- a/integration-tests/tika/src/main/resources/application.properties
+++ b/integration-tests/tika/src/main/resources/application.properties
@@ -1,2 +1,5 @@
-quarkus.tika.parsers=pdf
+quarkus.tika.parsers=pdf,ooxml
 quarkus.tika.parser-options.pdf.sort-by-position=true
+quarkus.tika.parser-file-format-support.ooxml.docx=true
+quarkus.tika.parser-file-format-support.ooxml.pptx=false
+quarkus.tika.parser-file-format-support.ooxml.xlsx=false
diff --git a/integration-tests/tika/src/main/resources/testDOCX_embedded.docx b/integration-tests/tika/src/main/resources/testDOCX_embedded.docx
new file mode 100644
index 0000000000000..cb386bde4aaeb
Binary files /dev/null and b/integration-tests/tika/src/main/resources/testDOCX_embedded.docx differ
diff --git a/integration-tests/tika/src/main/resources/testPPTX_embedded.pptx b/integration-tests/tika/src/main/resources/testPPTX_embedded.pptx
new file mode 100644
index 0000000000000..f116cca833b2a
Binary files /dev/null and b/integration-tests/tika/src/main/resources/testPPTX_embedded.pptx differ
diff --git a/integration-tests/tika/src/main/resources/testXLSX_embedded.xlsx b/integration-tests/tika/src/main/resources/testXLSX_embedded.xlsx
new file mode 100644
index 0000000000000..32f3b24dde28e
Binary files /dev/null and b/integration-tests/tika/src/main/resources/testXLSX_embedded.xlsx differ
diff --git a/integration-tests/tika/src/test/java/io/quarkus/it/tika/TikaEmbeddedContentTest.java b/integration-tests/tika/src/test/java/io/quarkus/it/tika/TikaEmbeddedContentTest.java
index 96df69f9577ab..29e454617d2fe 100644
--- a/integration-tests/tika/src/test/java/io/quarkus/it/tika/TikaEmbeddedContentTest.java
+++ b/integration-tests/tika/src/test/java/io/quarkus/it/tika/TikaEmbeddedContentTest.java
@@ -5,7 +5,11 @@
 
 import java.io.ByteArrayOutputStream;
 import java.io.InputStream;
+import java.util.Optional;
 
+import javax.inject.Inject;
+
+import org.eclipse.microprofile.config.inject.ConfigProperty;
 import org.junit.jupiter.api.Test;
 
 import io.quarkus.test.junit.QuarkusTest;
@@ -13,6 +17,13 @@
 @QuarkusTest
 public class TikaEmbeddedContentTest {
 
+    @Inject
+    @ConfigProperty(name = "quarkus.tika.parser-file-format-support.ooxml.pptx")
+    Optional parsePptx;
+    @Inject
+    @ConfigProperty(name = "quarkus.tika.parser-file-format-support.ooxml.xlsx")
+    Optional parseXlsx;
+
     @Test
     public void testGetOuterText() throws Exception {
         given()
@@ -35,6 +46,44 @@ public void testGetInnerText() throws Exception {
                 .body(containsString("The quick brown fox jumps over the lazy dog"));
     }
 
+    @Test
+    public void contentTypePPTXText() throws Exception {
+        if (true || !parsePptx.orElse(true)) {
+            return;
+        }
+        contentTypeText("testPPTX_embedded.pptx",
+                "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+    }
+
+    @Test
+    public void contentTypeXLSXText() throws Exception {
+        if (true || !parseXlsx.orElse(true)) {
+            return;
+        }
+        contentTypeText("testXLSX_embedded.xlsx",
+                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+    }
+
+    @Test
+    public void contentTypeDOCXText() throws Exception {
+        contentTypeText("testDOCX_embedded.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+    }
+
+    @Test
+    public void contentTypeEXCELText() throws Exception {
+        contentTypeText("testEXCEL_embeded.xls", "application/vnd.ms-excel");
+    }
+
+    private void contentTypeText(String fileName, String expected) throws Exception {
+        given()
+                .when()
+                .body(readTestFile(fileName))
+                .post("/embedded/contentType")
+                .then()
+                .statusCode(200)
+                .body(containsString(expected));
+    }
+
     private byte[] readTestFile(String fileName) throws Exception {
         try (InputStream is = getClass().getClassLoader().getResourceAsStream(fileName)) {
             return readBytes(is);