Skip to content

Commit

Permalink
WatchImporter: Treat locked files as open
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Sep 20, 2023
1 parent 4d32548 commit 41e3d95
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion ui/src/bamboo/task/WatchImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.channels.FileChannel;
import java.nio.file.*;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -123,11 +124,26 @@ void handleOpenWarc(Config.Watch watch, Path path) throws IOException {
private void handleClosedWarc(Config.Watch watch, Path path) throws IOException {
log.finest("handleClosedWarc(" + path + ")");

long size = Files.size(path);
if (size == 0) {
return; // ignore empty files
}

// Pywb flocks open files instead of renaming them, so check for a file lock
try (FileChannel channel = FileChannel.open(path, StandardOpenOption.READ)) {
var lock = channel.tryLock();
if (lock == null) {
log.finest("WARC has file lock, treating as still open: " + path);
handleOpenWarc(watch, path);
return;
}
lock.release();
}

String filename = path.getFileName().toString();
Warc warc = warcs.getOrNullByFilename(filename);
Crawl crawl = crawls.get(watch.crawlId);

long size = Files.size(path);
String digest = Scrub.calculateDigest("SHA-256", path);

log.info("Moving now-closed WARC " + path);
Expand Down

0 comments on commit 41e3d95

Please sign in to comment.