Skip to content

Commit

Permalink
Replaced Inserting with Upserting again in MessageBundle.properties
Browse files Browse the repository at this point in the history
Fixed wrong tab size for indention
  • Loading branch information
Gogs committed Jan 17, 2018
1 parent f4ec574 commit 9695dad
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 285 deletions.
362 changes: 180 additions & 182 deletions src/main/java/gov/loc/repository/bagit/conformance/ManifestChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,200 +20,198 @@
import gov.loc.repository.bagit.util.PathUtils;

/**
* Part of the BagIt conformance suite.
* This checker checks for various problems related to the manifests in a bag.
* Part of the BagIt conformance suite. This checker checks for various problems
* related to the manifests in a bag.
*/
@SuppressWarnings({"PMD.UseLocaleWithCaseConversions"})
public final class ManifestChecker {
private static final Logger logger = LoggerFactory.getLogger(ManifestChecker.class);
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

private static final String THUMBS_DB_FILE = "[Tt][Hh][Uu][Mm][Bb][Ss]\\.[Dd][Bb]";
private static final String DS_STORE_FILE = "\\.[Dd][Ss]_[Ss][Tt][Oo][Rr][Ee]";
private static final String SPOTLIGHT_FILE = "\\.[Ss][Pp][Oo][Tt][Ll][Ii][Gg][Hh][Tt]-[Vv]100";
private static final String TRASHES_FILE = "\\.(_.)?[Tt][Rr][Aa][Ss][Hh][Ee][Ss]";
private static final String FS_EVENTS_FILE = "\\.[Ff][Ss][Ee][Vv][Ee][Nn][Tt][Ss][Dd]";
private static final String OS_FILES_REGEX = ".*data/(" + THUMBS_DB_FILE + "|" + DS_STORE_FILE + "|" + SPOTLIGHT_FILE + "|" + TRASHES_FILE + "|" + FS_EVENTS_FILE + ")";

private ManifestChecker(){
//intentionally left empty
}

/*
public final class ManifestChecker{

private static final Logger logger = LoggerFactory.getLogger(ManifestChecker.class);
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

private static final String THUMBS_DB_FILE = "[Tt][Hh][Uu][Mm][Bb][Ss]\\.[Dd][Bb]";
private static final String DS_STORE_FILE = "\\.[Dd][Ss]_[Ss][Tt][Oo][Rr][Ee]";
private static final String SPOTLIGHT_FILE = "\\.[Ss][Pp][Oo][Tt][Ll][Ii][Gg][Hh][Tt]-[Vv]100";
private static final String TRASHES_FILE = "\\.(_.)?[Tt][Rr][Aa][Ss][Hh][Ee][Ss]";
private static final String FS_EVENTS_FILE = "\\.[Ff][Ss][Ee][Vv][Ee][Nn][Tt][Ss][Dd]";
private static final String OS_FILES_REGEX = ".*data/(" + THUMBS_DB_FILE + "|" + DS_STORE_FILE + "|" + SPOTLIGHT_FILE + "|" + TRASHES_FILE + "|" + FS_EVENTS_FILE + ")";

private ManifestChecker(){
//intentionally left empty
}

/*
* Check for all the manifest specific potential problems
*/
public static void checkManifests(final Path bagitDir, final Charset encoding, final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagitFileFormatException{
boolean missingTagManifest = true;
try(final DirectoryStream<Path> files = Files.newDirectoryStream(bagitDir)){
for(final Path file : files){
final String filename = PathUtils.getFilename(file);
if(filename.contains("manifest-")){
if(filename.startsWith("manifest-")){
checkData(file, encoding, warnings, warningsToIgnore, true);
}
else{
checkData(file, encoding, warnings, warningsToIgnore, false);
missingTagManifest = false;
}

final String algorithm = filename.split("[-\\.]")[1];
checkAlgorthm(algorithm, warnings, warningsToIgnore);
}
*/
public static void checkManifests(final Path bagitDir, final Charset encoding, final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagitFileFormatException{

boolean missingTagManifest = true;
try(final DirectoryStream<Path> files = Files.newDirectoryStream(bagitDir)){
for(final Path file : files){
final String filename = PathUtils.getFilename(file);
if(filename.contains("manifest-")){
if(filename.startsWith("manifest-")){
checkData(file, encoding, warnings, warningsToIgnore, true);
} else{
checkData(file, encoding, warnings, warningsToIgnore, false);
missingTagManifest = false;
}

final String algorithm = filename.split("[-\\.]")[1];
checkAlgorthm(algorithm, warnings, warningsToIgnore);
}
}
}
}

if(!warningsToIgnore.contains(BagitWarning.MISSING_TAG_MANIFEST) && missingTagManifest){
logger.warn(messages.getString("bag_missing_tag_manifest_warning"), bagitDir);
warnings.add(BagitWarning.MISSING_TAG_MANIFEST);
}
}

/*

if(!warningsToIgnore.contains(BagitWarning.MISSING_TAG_MANIFEST) && missingTagManifest){
logger.warn(messages.getString("bag_missing_tag_manifest_warning"), bagitDir);
warnings.add(BagitWarning.MISSING_TAG_MANIFEST);
}
}

/*
* Check for a "bag within a bag" and for relative paths in the manifests
*/
private static void checkData(final Path manifestFile, final Charset encoding, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final boolean isPayloadManifest) throws IOException, InvalidBagitFileFormatException{
try(final BufferedReader reader = Files.newBufferedReader(manifestFile, encoding)){
final Set<String> paths = new HashSet<>();

String line = reader.readLine();
while(line != null){
String path = parsePath(line);

path = checkForManifestCreatedWithMD5SumTools(path, warnings, warningsToIgnore);

if(!warningsToIgnore.contains(BagitWarning.DIFFERENT_CASE) && paths.contains(path.toLowerCase())){
logger.warn(messages.getString("different_case_warning"), manifestFile, path);
warnings.add(BagitWarning.DIFFERENT_CASE);
}
paths.add(path.toLowerCase());

if(encoding.name().startsWith("UTF")){
checkNormalization(path, manifestFile.getParent(), warnings, warningsToIgnore);
}

checkForBagWithinBag(line, warnings, warningsToIgnore, isPayloadManifest);

checkForRelativePaths(line, warnings, warningsToIgnore, manifestFile);

checkForOSSpecificFiles(line, warnings, warningsToIgnore, manifestFile);

line = reader.readLine();
*/
private static void checkData(final Path manifestFile, final Charset encoding, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final boolean isPayloadManifest) throws IOException, InvalidBagitFileFormatException{
try(final BufferedReader reader = Files.newBufferedReader(manifestFile, encoding)){
final Set<String> paths = new HashSet<>();

String line = reader.readLine();
while(line != null){
String path = parsePath(line);

path = checkForManifestCreatedWithMD5SumTools(path, warnings, warningsToIgnore);

if(!warningsToIgnore.contains(BagitWarning.DIFFERENT_CASE) && paths.contains(path.toLowerCase())){
logger.warn(messages.getString("different_case_warning"), manifestFile, path);
warnings.add(BagitWarning.DIFFERENT_CASE);
}
paths.add(path.toLowerCase());

if(encoding.name().startsWith("UTF")){
checkNormalization(path, manifestFile.getParent(), warnings, warningsToIgnore);
}

checkForBagWithinBag(line, warnings, warningsToIgnore, isPayloadManifest);

checkForRelativePaths(line, warnings, warningsToIgnore, manifestFile);

checkForOSSpecificFiles(line, warnings, warningsToIgnore, manifestFile);

line = reader.readLine();
}
}
}
}

static String parsePath(final String line) throws InvalidBagitFileFormatException{
final String[] parts = line.split("\\s+", 2);
if(parts.length < 2){
final String formattedMessage = messages.getString("manifest_line_violated_spec_error");
throw new InvalidBagitFileFormatException(MessageFormatter.format(formattedMessage, line).getMessage());
}

return parts[1];
}

private static String checkForManifestCreatedWithMD5SumTools(final String path, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore){
String fixedPath = path;
final boolean startsWithStar = path.charAt(0) == '*';

if(startsWithStar){
fixedPath = path.substring(1);
}

if(!warningsToIgnore.contains(BagitWarning.MD5SUM_TOOL_GENERATED_MANIFEST) && startsWithStar){
logger.warn(messages.getString("md5sum_generated_line_warning"), path);
warnings.add(BagitWarning.MD5SUM_TOOL_GENERATED_MANIFEST);
}

return fixedPath;
}

/*
* Check that the file specified has not changed its normalization (i.e. have the bytes changed but it still looks the same?)
*/
private static void checkNormalization(final String path, final Path rootDir, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore) throws IOException{
if(!warningsToIgnore.contains(BagitWarning.DIFFERENT_NORMALIZATION)){

final Path fileToCheck = rootDir.resolve(path).normalize();
final Path dirToCheck = fileToCheck.getParent();
if(dirToCheck == null){
final String formattedMessage = messages.getString("cannot_access_parent_path_error");
throw new IOException(MessageFormatter.format(formattedMessage, fileToCheck).getMessage()); //to satisfy findbugs
}

static String parsePath(final String line) throws InvalidBagitFileFormatException{
final String[] parts = line.split("\\s+", 2);
if(parts.length < 2){
final String formattedMessage = messages.getString("manifest_line_violated_spec_error");
throw new InvalidBagitFileFormatException(MessageFormatter.format(formattedMessage, line).getMessage());
}
final String normalizedFileToCheck = normalizePathToNFD(fileToCheck);

try(final DirectoryStream<Path> files = Files.newDirectoryStream(dirToCheck)){
for(final Path file : files){
final String normalizedFile = normalizePathToNFD(file);

if(!file.equals(fileToCheck) && normalizedFileToCheck.equals(normalizedFile)){
logger.warn(messages.getString("different_normalization_in_manifest_warning"), fileToCheck);
warnings.add(BagitWarning.DIFFERENT_NORMALIZATION);
}
}

return parts[1];
}

private static String checkForManifestCreatedWithMD5SumTools(final String path, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore){
String fixedPath = path;
final boolean startsWithStar = path.charAt(0) == '*';

if(startsWithStar){
fixedPath = path.substring(1);
}

if(!warningsToIgnore.contains(BagitWarning.MD5SUM_TOOL_GENERATED_MANIFEST) && startsWithStar){
logger.warn(messages.getString("md5sum_generated_line_warning"), path);
warnings.add(BagitWarning.MD5SUM_TOOL_GENERATED_MANIFEST);
}

return fixedPath;
}

/*
* Check that the file specified has not changed its normalization (i.e. have the bytes changed but it still looks the same?)
*/
private static void checkNormalization(final String path, final Path rootDir, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore) throws IOException{
if(!warningsToIgnore.contains(BagitWarning.DIFFERENT_NORMALIZATION)){

final Path fileToCheck = rootDir.resolve(path).normalize();
final Path dirToCheck = fileToCheck.getParent();
if(dirToCheck == null){
final String formattedMessage = messages.getString("cannot_access_parent_path_error");
throw new IOException(MessageFormatter.format(formattedMessage, fileToCheck).getMessage()); //to satisfy findbugs
}
final String normalizedFileToCheck = normalizePathToNFD(fileToCheck);

try(final DirectoryStream<Path> files = Files.newDirectoryStream(dirToCheck)){
for(final Path file : files){
final String normalizedFile = normalizePathToNFD(file);

if(!file.equals(fileToCheck) && normalizedFileToCheck.equals(normalizedFile)){
logger.warn(messages.getString("different_normalization_in_manifest_warning"), fileToCheck);
warnings.add(BagitWarning.DIFFERENT_NORMALIZATION);
}
}
}
}
}
}

/*
}

/*
* Normalize to Canonical decomposition.
*/
static String normalizePathToNFD(final Path path){
return Normalizer.normalize(path.toString(), Normalizer.Form.NFD);
}
/*
*/
static String normalizePathToNFD(final Path path){
return Normalizer.normalize(path.toString(), Normalizer.Form.NFD);
}

/*
* check for a bag within a bag
*/
private static void checkForBagWithinBag(final String line, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final boolean isPayloadManifest){
if(!warningsToIgnore.contains(BagitWarning.BAG_WITHIN_A_BAG) && isPayloadManifest && line.contains("manifest-")){
logger.warn(messages.getString("bag_within_bag_warning"));
warnings.add(BagitWarning.BAG_WITHIN_A_BAG);
}
}
/*
*/
private static void checkForBagWithinBag(final String line, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final boolean isPayloadManifest){
if(!warningsToIgnore.contains(BagitWarning.BAG_WITHIN_A_BAG) && isPayloadManifest && line.contains("manifest-")){
logger.warn(messages.getString("bag_within_bag_warning"));
warnings.add(BagitWarning.BAG_WITHIN_A_BAG);
}
}

/*
* Check for relative paths (i.e. ./) in the manifest
*/
private static void checkForRelativePaths(final String line, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final Path manifestFile){
if(!warningsToIgnore.contains(BagitWarning.LEADING_DOT_SLASH) && line.contains("./")){
logger.warn(messages.getString("leading_dot_slash_warning"), manifestFile, line);
warnings.add(BagitWarning.LEADING_DOT_SLASH);
}
}
/*
*/
private static void checkForRelativePaths(final String line, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final Path manifestFile){
if(!warningsToIgnore.contains(BagitWarning.LEADING_DOT_SLASH) && line.contains("./")){
logger.warn(messages.getString("leading_dot_slash_warning"), manifestFile, line);
warnings.add(BagitWarning.LEADING_DOT_SLASH);
}
}

/*
* like .DS_Store or Thumbs.db
*/
private static void checkForOSSpecificFiles(final String line, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final Path manifestFile){
if(!warningsToIgnore.contains(BagitWarning.OS_SPECIFIC_FILES) && line.matches(OS_FILES_REGEX)){
logger.warn(messages.getString("os_specific_files_warning"), manifestFile, line);
warnings.add(BagitWarning.OS_SPECIFIC_FILES);
}
}
/*
*/
private static void checkForOSSpecificFiles(final String line, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final Path manifestFile){
if(!warningsToIgnore.contains(BagitWarning.OS_SPECIFIC_FILES) && line.matches(OS_FILES_REGEX)){
logger.warn(messages.getString("os_specific_files_warning"), manifestFile, line);
warnings.add(BagitWarning.OS_SPECIFIC_FILES);
}
}

/*
* Check for anything weaker than SHA-512
*/
static void checkAlgorthm(final String algorithm, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore){
final String upperCaseAlg = algorithm.toUpperCase();
if(!warningsToIgnore.contains(BagitWarning.WEAK_CHECKSUM_ALGORITHM) &&
(upperCaseAlg.startsWith("MD") || upperCaseAlg.matches("SHA(1|224|256|384)?"))){
logger.warn(messages.getString("weak_algorithm_warning"), algorithm);
warnings.add(BagitWarning.WEAK_CHECKSUM_ALGORITHM);
}

else if(!warningsToIgnore.contains(BagitWarning.NON_STANDARD_ALGORITHM) && !"SHA-512".equals(upperCaseAlg)){
logger.warn(messages.getString("non_standard_algorithm_warning"), algorithm);
warnings.add(BagitWarning.NON_STANDARD_ALGORITHM);
}
}

//for unit test only
static String getOsFilesRegex() {
return OS_FILES_REGEX;
}
*/
static void checkAlgorthm(final String algorithm, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore){
final String upperCaseAlg = algorithm.toUpperCase();
if(!warningsToIgnore.contains(BagitWarning.WEAK_CHECKSUM_ALGORITHM)
&& (upperCaseAlg.startsWith("MD") || upperCaseAlg.matches("SHA(1|224|256|384)?"))){
logger.warn(messages.getString("weak_algorithm_warning"), algorithm);
warnings.add(BagitWarning.WEAK_CHECKSUM_ALGORITHM);
} else if(!warningsToIgnore.contains(BagitWarning.NON_STANDARD_ALGORITHM) && !"SHA-512".equals(upperCaseAlg)){
logger.warn(messages.getString("non_standard_algorithm_warning"), algorithm);
warnings.add(BagitWarning.NON_STANDARD_ALGORITHM);
}
}

//for unit test only
static String getOsFilesRegex(){
return OS_FILES_REGEX;
}

}
Loading

0 comments on commit 9695dad

Please sign in to comment.