Skip to content

Commit

Permalink
Merge branch 'release/33.x'
Browse files Browse the repository at this point in the history
* release/33.x:
  #5019 - SVGs embedded in MHTML files are not rendered
  #5017 - Encoding set in MHTML files is not respected
  #5015 - Update installation documentation for new initial password setting mechanism

% Conflicts:
%	inception/pom.xml
  • Loading branch information
reckart committed Aug 31, 2024
2 parents 7f11a9f + d86ed3f commit 4b3dbf6
Show file tree
Hide file tree
Showing 12 changed files with 11,061 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,12 @@ $ java -jar inception-app-standalone-x.xx.x.jar
In this case, no splash screen will appear.
Just go to http://localhost:8080/[_http://localhost:8080_] in your browser.

*Step 3 - Log in:* The first time you start the application, a default user with the *username _admin_* and the *password _admin_* is created.
Use this username and password to log in to the application.
*Step 3 - Log in:* The first time you start the application, you will be asked to set a password for the default **admin** user.
You need to enter this password into two separate fields.
Only if the same password has been entered into both fields, it will be accepted and saved.
After the password has been set, you will be redirected to the regular login screen where you can log in using the username **admin** and the password you have just set.

image::getting_started_login.png[align="center"]
image::getting_started_set_password.png[align="center"]

*You have finished the installation.*

Expand Down
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@
package de.tudarmstadt.ukp.inception.externaleditor.xhtml;

import static java.lang.invoke.MethodHandles.lookup;
import static java.util.Optional.ofNullable;
import static org.slf4j.LoggerFactory.getLogger;
import static org.springframework.http.HttpStatus.OK;
import static org.springframework.http.MediaType.IMAGE_GIF;
import static org.springframework.http.MediaType.IMAGE_JPEG;
import static org.springframework.http.MediaType.IMAGE_PNG;

import java.io.FileNotFoundException;
import java.io.IOException;
Expand All @@ -30,6 +34,7 @@

import javax.xml.XMLConstants;

import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.CAS;
import org.dkpro.core.api.xml.type.XmlDocument;
import org.dkpro.core.api.xml.type.XmlElement;
Expand Down Expand Up @@ -70,6 +75,8 @@ public class XHtmlXmlDocumentViewControllerImpl
extends XmlDocumentViewControllerImplBase
implements XHtmlXmlDocumentViewController
{
private static final MediaType IMAGE_SVG = MediaType.parseMediaType("image/svg+xml");

private static final Logger LOG = getLogger(lookup().lookupClass());

private static final String GET_DOCUMENT_PATH = "/p/{projectId}/d/{documentId}/xml";
Expand Down Expand Up @@ -282,6 +289,9 @@ public ResponseEntity<InputStreamResource> getResource(
try {
var inputStream = formatSupport.openResourceStream(srcDocFile, aResourceId);
var httpHeaders = new HttpHeaders();

getContentType(aResourceId).ifPresent(httpHeaders::setContentType);

return new ResponseEntity<>(new InputStreamResource(inputStream), httpHeaders, OK);
}
catch (FileNotFoundException e) {
Expand All @@ -294,6 +304,24 @@ public ResponseEntity<InputStreamResource> getResource(
}
}

private Optional<MediaType> getContentType(String aResourceId)
{
var suffix = StringUtils.substringAfterLast(aResourceId, ".");

if (suffix == null) {
return Optional.empty();
}

return ofNullable(switch (suffix) {
case "svg" -> IMAGE_SVG;
case "png" -> IMAGE_PNG;
case "gif" -> IMAGE_GIF;
case "jpg" -> IMAGE_JPEG;
case "jpeg" -> IMAGE_JPEG;
default -> null;
});
}

private ResponseEntity<String> toResponse(StringWriter aOut)
{
return ResponseEntity.ok() //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,7 @@ public void getNext(JCas aJCas) throws IOException, CollectionException
var message = builder.parseMessage(is);
var htmlDocument = getDocument(message);
try (var docIs = htmlDocument.getInputStream()) {
var charset = htmlDocument.getMimeCharset();
if ("US-ASCII".equals(charset)) {
// mime4j uses US_ASCII as default and we cannot override it. While it may be
// technically correct, e.g. Chrome seems to use UTF-8 by default but does not
// provide an encoding the MHTML files... *sigh*
charset = "UTF-8";
}
var doc = Jsoup.parse(docIs, charset, "");
var doc = Jsoup.parse(docIs, null, "");

var visitor = new CasXmlNodeVisitor(aJCas, normalizeWhitespace);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import static java.util.Arrays.asList;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
Expand Down Expand Up @@ -113,7 +115,10 @@ public InputStream openResourceStream(File aDocFile, String aResourcePath) throw
var builder = new DefaultMessageBuilder();
var message = builder.parseMessage(is);
var resourceBody = getResourcePartBody(message, aResourcePath);
return resourceBody.getInputStream();
try (var baos = new ByteArrayOutputStream()) {
resourceBody.getInputStream().transferTo(baos);
return new ByteArrayInputStream(baos.toByteArray());
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.io.html;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader;
import static org.apache.uima.fit.factory.JCasFactory.createJCas;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.contentOf;

import java.io.File;

import org.junit.jupiter.api.Test;

class MHtmlDocumentReaderTest
{
@Test
void testMHtmlWithUtf8Encoding() throws Exception
{
var baseName = "Exceptional_isomorphism_Wikipedia";
var cas = createJCas();

var sut = createReader( //
MHtmlDocumentReader.class, //
MHtmlDocumentReader.PARAM_SOURCE_LOCATION,
"src/test/resources/mhtml/" + baseName + ".mhtml");

sut.getNext(cas.getCas());

assertThat(cas.getDocumentText()) //
.isEqualTo(contentOf(new File("src/test/resources/mhtml/" + baseName + ".txt"),
UTF_8));
}

@Test
void testMHtmlWithWindows1252Encoding() throws Exception
{
var baseName = "A_KAE";
var cas = createJCas();

var sut = createReader( //
MHtmlDocumentReader.class, //
MHtmlDocumentReader.PARAM_SOURCE_LOCATION,
"src/test/resources/mhtml/" + baseName + ".mhtml");

sut.getNext(cas.getCas());

assertThat(cas.getDocumentText()) //
.isEqualTo(contentOf(new File("src/test/resources/mhtml/" + baseName + ".txt"),
UTF_8));
}
}
Loading

0 comments on commit 4b3dbf6

Please sign in to comment.