Skip to content

Commit

Permalink
use same copy behavior as for journal abbrevs
Browse files Browse the repository at this point in the history
  • Loading branch information
Siedlerchr committed Nov 21, 2023
1 parent 1dfa35b commit 8462464
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 36 deletions.
47 changes: 23 additions & 24 deletions src/main/java/org/jabref/logic/journals/PredatoryJournalLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
Expand All @@ -20,10 +19,8 @@
import java.util.regex.Pattern;

import org.jabref.logic.net.URLDownload;
import org.jabref.logic.util.OS;
import org.jabref.model.strings.StringUtil;

import net.harawata.appdirs.AppDirsFactory;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
Expand Down Expand Up @@ -78,34 +75,29 @@ private static class PJSource {
public static PredatoryJournalRepository loadRepository() {
PredatoryJournalRepository repository = new PredatoryJournalRepository();

// Initialize with built-in list
try (InputStream resourceAsStream = PredatoryJournalRepository.class.getResourceAsStream("/journals/predatoryJournal-list.mv")) {
if (resourceAsStream == null) {
LOGGER.warn("There is no predatoryJournalList.mv. We use a default predatory journal list");
} else {
// Use user's app data directory for more permanent storage
Path appDataDir = Path.of(AppDirsFactory.getInstance()
.getUserDataDir(
OS.APP_DIR_APP_NAME,
"predatoryJournals",
OS.APP_DIR_APP_AUTHOR));
Files.createDirectories(appDataDir); // Ensure the directory exists
Path predatoryJournalListPath = appDataDir.resolve("predatoryJournal-list.mv");
Files.copy(resourceAsStream, predatoryJournalListPath, StandardCopyOption.REPLACE_EXISTING);
repository = new PredatoryJournalRepository(predatoryJournalListPath);
Path tempDir = Files.createTempDirectory("jabref-journal");
Path tempJournalList = tempDir.resolve("predatoryJournal-list.mv");
Files.copy(resourceAsStream, tempJournalList);
repository = new PredatoryJournalRepository(tempJournalList);
tempDir.toFile().deleteOnExit();
tempJournalList.toFile().deleteOnExit();
}
} catch (
IOException e) {
} catch (IOException e) {
LOGGER.error("Error while copying predatory journal list", e);
return repository;
}
return repository;
}

/**
* Loads predatory journal information from online resources
*/
public void loadFromOnlineSources() {
// populates linkElements (and predatoryJournals if CSV)
PREDATORY_SOURCES.forEach(this::crawl);
// adds cleaned HTML to predatoryJournals
LINK_ELEMENTS.forEach(this::clean);

LOGGER.info("Updated predatory journal list");
Expand All @@ -116,17 +108,16 @@ private void crawl(PJSource source) {
URLDownload download = new URLDownload(source.url);

if (!download.canBeReached()) {
LOGGER.warn("URL UNREACHABLE");
LOGGER.warn("Url {} is unreachable", source.url);
} else if (source.url.getPath().contains(".csv")) {
handleCSV(new InputStreamReader(download.asInputStream()));
} else {
if (source.elementPattern.isPresent()) {
handleHTML(source.elementPattern.get(), download.asString());
}
}
} catch (
IOException ex) {
LOGGER.error("Could not crawl source {}", source.url, ex);
} catch (IOException ex) {
LOGGER.error("Could not crawl source for predatory journals {}", source.url, ex);
}
}

Expand All @@ -147,7 +138,7 @@ private void handleCSV(Reader reader) throws IOException {
}
}
// changes column order from CSV (source: url, name, abbr)
predatoryJournalInformations.add(new PredatoryJournalInformation(name, abbr, url));
predatoryJournalInformations.add(new PredatoryJournalInformation(decode(name), decode(abbr), url));
}
}

Expand Down Expand Up @@ -180,10 +171,18 @@ private void clean(String item) {
return;
}
}
predatoryJournalInformations.add(new PredatoryJournalInformation(name, abbr, url));
predatoryJournalInformations.add(new PredatoryJournalInformation(decode(name), decode(abbr), url));
}
}

private String decode(String s) {
return Optional.ofNullable(s)
.orElse("")
.replace(",", "")
.replace("&", "&")
.replace("’", "'")
.replace("–", "-");
}
public Set<PredatoryJournalInformation> getPredatoryJournalInformations() {
return new HashSet<>(predatoryJournalInformations);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -48,24 +47,15 @@ public boolean isKnownName(String journalName) {
String journal = journalName.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&");

if (predatoryJournals.containsKey(journal)) {
LOGGER.info("match: " + journal);
LOGGER.debug("Found predatory journal {}", journal);
return true;
}

var matches = predatoryJournals.keySet().stream()
.filter(key -> match.isSimilar(journal.toLowerCase(Locale.ROOT), key.toLowerCase(Locale.ROOT)))
.collect(Collectors.toList());

LOGGER.info("matches: " + String.join(", ", matches));
LOGGER.info("Found multiple possible predatory journals {}", String.join(", ", matches));
return !matches.isEmpty();
}

private String decode(String s) {
return Optional.ofNullable(s)
.orElse("")
.replace(",", "")
.replace("&amp;", "&")
.replace("&#8217;", "'")
.replace("&#8211;", "-");
}
}

0 comments on commit 8462464

Please sign in to comment.