Skip to content

Commit

Permalink
CR: Javadoc + method on store file metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
original-brownbear committed Feb 7, 2020
1 parent 61a468e commit 8794541
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

package org.elasticsearch.index.store;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.lucene.store.ByteArrayIndexInput;

import java.io.IOException;
import java.text.ParseException;
Expand Down Expand Up @@ -100,6 +102,29 @@ public String checksum() {
return this.checksum;
}

/**
* Checks if the bytes returned by {@link #hash()} are the contents of the file that this instances refers to.
*
* @return {@code true} iff {@link #hash()} will return the actual file contents
*/
public boolean hashEqualsContents() {
if (hash.length == length) {
try {
final boolean checksumsMatch = Store.digestToString(CodecUtil.retrieveChecksum(
new ByteArrayIndexInput("store_file", hash.bytes, hash.offset, hash.length))).equals(checksum);
assert checksumsMatch : "Checksums did not match for [" + this + "] which has a hash of [" + hash + "]";
return checksumsMatch;
} catch (Exception e) {
// Hash didn't contain any bytes that Lucene could extract a checksum from so we can't verify against the checksum of the
// original file. We should never see an exception here because lucene files are assumed to always contain the checksum
// footer.
assert false : new AssertionError("Saw exception for hash [" + hash + "] but expected it to be Lucene file", e);
return false;
}
}
return false;
}

/**
* Returns <code>true</code> iff the length and the checksums are the same. otherwise <code>false</code>
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp

private static final String UPLOADED_DATA_BLOB_PREFIX = "__";

/**
* Prefix used for the identifiers of data blobs that were not actually written to the repository physically because their contents are
* already stored in the metadata referencing them, i.e. in {@link BlobStoreIndexShardSnapshot} and
* {@link BlobStoreIndexShardSnapshots}. This is the case for files for which {@link StoreFileMetaData#hashEqualsContents()} is
* {@code true}.
*/
private static final String VIRTUAL_DATA_BLOB_PREFIX = "v__";

/**
Expand Down Expand Up @@ -1521,7 +1527,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s

// We can skip writing blobs where the metadata hash is equal to the blob's contents because we store the hash/contents
// directly in the shard level metadata in this case
final boolean needsWrite = md.hash().length != md.length();
final boolean needsWrite = md.hashEqualsContents() == false;
indexTotalFileCount += md.length();
indexTotalNumberOfFiles++;

Expand Down

0 comments on commit 8794541

Please sign in to comment.