Skip to content

Commit

Permalink
Merge pull request #800 from btut/feature/bibtexHeaderAndFulltext
Browse files Browse the repository at this point in the history
Accept application/x-bibtex for processHeaderDocument
  • Loading branch information
kermitt2 authored Aug 1, 2021
2 parents e4ca196 + 2a69fa4 commit 1a6b103
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 13 deletions.
22 changes: 16 additions & 6 deletions grobid-core/src/main/java/org/grobid/core/data/BiblioItem.java
Original file line number Diff line number Diff line change
Expand Up @@ -1922,12 +1922,19 @@ public String toBibTeX(String id, GrobidAnalysisConfig config) {
fullAuthors.stream()
.filter(person -> person != null)
.forEachOrdered(person -> {
String author = person.getLastName();
String author = "";
if (person.getLastName() != null) {
author = person.getLastName();
}
if (person.getFirstName() != null) {
author += ", ";
if (author.length() > 0) {
author += ", ";
}
author += person.getFirstName();
}
authors.add(author);
if (author.length() > 0 ) {
authors.add(author);
}
});
} else if (this.authors != null) {
StringTokenizer st = new StringTokenizer(this.authors, ";");
Expand Down Expand Up @@ -1973,9 +1980,12 @@ public String toBibTeX(String id, GrobidAnalysisConfig config) {
}
// fullEditors has to be used instead

// year
if (publication_date != null) {
bibtex.add(" year = {" + publication_date + "}");
// dates
if (normalized_publication_date != null) {
String isoDate = TEIFormatter.toISOString(normalized_publication_date);
if (isoDate != null) {
bibtex.add(" date = {" + isoDate + "}");
}
}

// address
Expand Down
2 changes: 1 addition & 1 deletion grobid-core/src/main/java/org/grobid/core/data/Date.java
Original file line number Diff line number Diff line change
Expand Up @@ -211,4 +211,4 @@ public String toXML() {
return theDate;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,26 +152,54 @@ public Response getAdmin_htmlGet(@QueryParam(SHA1) String sha1) {
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(MediaType.APPLICATION_XML)
@POST
public Response processHeaderDocument_post(
public Response processHeaderDocumentReturnXml_post(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
int consol = validateConsolidationParam(consolidate);
return restProcessFiles.processStatelessHeaderDocument(
inputStream, consol,
validateIncludeRawParam(includeRawAffiliations)
validateIncludeRawParam(includeRawAffiliations),
ExpectedResponseType.XML
);
}

@Path(PATH_HEADER)
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(MediaType.APPLICATION_XML)
@PUT
public Response processStatelessHeaderDocument(
public Response processStatelessHeaderDocumentReturnXml(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
return processHeaderDocumentReturnXml_post(inputStream, consolidate, includeRawAffiliations);
}

@Path(PATH_HEADER)
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(BibTexMediaType.MEDIA_TYPE)
@POST
public Response processHeaderDocumentReturnBibTeX_post(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
int consol = validateConsolidationParam(consolidate);
return restProcessFiles.processStatelessHeaderDocument(
inputStream, consol,
validateIncludeRawParam(includeRawAffiliations),
ExpectedResponseType.BIBTEX
);
}

@Path(PATH_HEADER)
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(BibTexMediaType.MEDIA_TYPE)
@PUT
public Response processStatelessHeaderDocumentReturnBibTeX(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
return processHeaderDocument_post(inputStream, consolidate, includeRawAffiliations);
return processHeaderDocumentReturnBibTeX_post(inputStream, consolidate, includeRawAffiliations);
}

@Path(PATH_FULL_TEXT)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.google.inject.Singleton;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.PatentItem;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentSource;
Expand Down Expand Up @@ -62,7 +63,8 @@ public GrobidRestProcessFiles() {
public Response processStatelessHeaderDocument(
final InputStream inputStream,
final int consolidate,
final boolean includeRawAffiliations
final boolean includeRawAffiliations,
ExpectedResponseType expectedResponseType
) {
LOGGER.debug(methodLogIn());
String retVal = null;
Expand Down Expand Up @@ -91,17 +93,24 @@ public Response processStatelessHeaderDocument(

String md5Str = DatatypeConverter.printHexBinary(digest).toUpperCase();

BiblioItem result = new BiblioItem();

// starts conversion process
retVal = engine.processHeader(
originFile.getAbsolutePath(),
md5Str,
consolidate,
includeRawAffiliations,
null
result
);

if (GrobidRestUtils.isResultNullOrEmpty(retVal)) {
response = Response.status(Response.Status.NO_CONTENT).build();
} else if (expectedResponseType == ExpectedResponseType.BIBTEX) {
response = Response.status(Response.Status.OK)
.entity(result.toBibTeX("-1"))
.header(HttpHeaders.CONTENT_TYPE, BibTexMediaType.MEDIA_TYPE + "; charset=UTF-8")
.build();
} else {
response = Response.status(Response.Status.OK)
.entity(retVal)
Expand Down

0 comments on commit 1a6b103

Please sign in to comment.