Skip to content

Commit

Permalink
crc32c first version, except the complete upload part meta data header
Browse files Browse the repository at this point in the history
  • Loading branch information
vintmd committed Oct 14, 2020
1 parent c5006e6 commit 84cf837
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 13 deletions.
65 changes: 65 additions & 0 deletions src/main/java/org/apache/hadoop/fs/CRC32CCheckSum.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.apache.hadoop.fs;

import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.util.StringUtils;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.math.BigInteger;

/**
* An etag as a checksum.
* Consider these suitable for checking if an object has changed, but
* not suitable for comparing two different objects for equivalence,
* especially between hadoop compatible filesystem.
*/
public class CRC32CCheckSum extends FileChecksum {
private static final String ALGORITHM_NAME = "CRC32C";

private long crc32c = 0;

public CRC32CCheckSum() {
}

public CRC32CCheckSum(String crc32cecma) {
try {
BigInteger bigInteger = new BigInteger(crc32cecma);
this.crc32c = bigInteger.longValue();
} catch (NumberFormatException e) {
this.crc32c = 0;
}
}

@Override
public String getAlgorithmName() {
return CRC32CCheckSum.ALGORITHM_NAME;
}

@Override
public int getLength() {
return Long.SIZE / Byte.SIZE;
}

@Override
public byte[] getBytes() {
return this.crc32c != 0 ? WritableUtils.toByteArray(this) : new byte[0];
}

@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(this.crc32c);
}

@Override
public void readFields(DataInput dataInput) throws IOException {
this.crc32c = dataInput.readLong();
}

@Override
public String toString() {
return "CRC32CChecksum{" +
"crc32c=" + crc32c +
'}';
}
}
7 changes: 7 additions & 0 deletions src/main/java/org/apache/hadoop/fs/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ private Constants() {
// Suffix for local cache file name
public static final String BLOCK_TMP_FILE_SUFFIX = "_local_block_cache";

// Crc32c server response header key
public static final String CRC32C_RESP_HEADER = "x-cos-hash-crc32c";
// Crc32c agent request header key
public static final String CRC32C_REQ_HEADER = "x-cos-crc32c-flag";
// Crc32c agent request header value
public static final String CRC32C_REQ_HEADER_VAL = "cosn";

// Maximum number of blocks uploaded in trunks.
public static final int MAX_PART_NUM = 10000;
// The maximum size of a single block.
Expand Down
8 changes: 8 additions & 0 deletions src/main/java/org/apache/hadoop/fs/CosFileSystem.java
Original file line number Diff line number Diff line change
Expand Up @@ -820,13 +820,21 @@ public FileChecksum getFileChecksum(Path f, long length) throws IOException {
Preconditions.checkArgument(length >= 0);
LOG.debug("Call the checksum for the path: {}.", f);

// The order of each file, must support both crc at same time, how to tell the difference crc request?
if (this.getConf().getBoolean(CosNConfigKeys.CRC64_CHECKSUM_ENABLED,
CosNConfigKeys.DEFAULT_CRC64_CHECKSUM_ENABLED)) {
Path absolutePath = makeAbsolute(f);
String key = pathToKey(absolutePath);
FileMetadata fileMetadata = this.store.retrieveMetadata(key);
String crc64ecm = fileMetadata.getCrc64ecm();
return crc64ecm != null ? new CRC64Checksum(crc64ecm) : super.getFileChecksum(f, length);
} else if (this.getConf().getBoolean(CosNConfigKeys.CRC32C_CHECKSUM_ENABLED,
CosNConfigKeys.DEFAULT_CRC32C_CHECKSUM_ENABLED)) {
Path absolutePath = makeAbsolute(f);
String key = pathToKey(absolutePath);
FileMetadata fileMetadata = this.store.retrieveMetadata(key);
String crc32cm = fileMetadata.getCrc32cm();
return crc32cm != null ? new CRC32CCheckSum(crc32cm) : super.getFileChecksum(f, length);
} else {
// disabled
return super.getFileChecksum(f, length);
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/org/apache/hadoop/fs/CosNConfigKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,12 @@ public class CosNConfigKeys extends CommonConfigurationKeys {
public static final int DEFAULT_TRAFFIC_LIMIT = -1;

// checksum
// crc64
public static final String CRC64_CHECKSUM_ENABLED = "fs.cosn.crc64.checksum.enabled";
public static final boolean DEFAULT_CRC64_CHECKSUM_ENABLED = false;
// crc32c
public static final String CRC32C_CHECKSUM_ENABLED = "fs.cosn.crc32c.checksum.enabled";
public static final boolean DEFAULT_CRC32C_CHECKSUM_ENABLED = false;

public static final String HTTP_PROXY_IP = "fs.cosn.http.proxy.ip";
public static final String HTTP_PROXY_PORT = "fs.cosn.http.proxy.port";
Expand Down
55 changes: 48 additions & 7 deletions src/main/java/org/apache/hadoop/fs/CosNativeFileSystemStore.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.qcloud.cos.utils.Jackson;
import com.qcloud.cos.utils.StringUtils;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.math3.analysis.function.Constant;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -41,6 +42,7 @@ public class CosNativeFileSystemStore implements NativeFileSystemStore {
private StorageClass storageClass;
private int maxRetryTimes;
private int trafficLimit;
private boolean openCrc32c;
private CosEncryptionSecrets encryptionSecrets;
private CustomerDomainEndpointResolver customerDomainEndpointResolver;

Expand Down Expand Up @@ -90,6 +92,9 @@ private void initCOSClient(URI uri, Configuration conf) throws IOException {
config.setHttpProtocol(HttpProtocol.https);
}

this.openCrc32c = conf.getBoolean(CosNConfigKeys.CRC32C_CHECKSUM_ENABLED,
CosNConfigKeys.DEFAULT_CRC32C_CHECKSUM_ENABLED);

// Proxy settings
String httpProxyIp = conf.getTrimmed(CosNConfigKeys.HTTP_PROXY_IP);
int httpProxyPort = conf.getInt(CosNConfigKeys.HTTP_PROXY_PORT, CosNConfigKeys.DEFAULT_HTTP_PROXY_PORT);
Expand Down Expand Up @@ -200,6 +205,9 @@ private void storeFileWithRetry(String key, InputStream inputStream,
objectMetadata.setContentMD5(Base64.encodeAsString(md5Hash));
}
objectMetadata.setContentLength(length);
if (openCrc32c) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

PutObjectRequest putObjectRequest =
new PutObjectRequest(bucketName, key, inputStream,
Expand Down Expand Up @@ -265,6 +273,10 @@ public void storeEmptyFile(String key) throws IOException {

ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentLength(0);
if (openCrc32c) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

InputStream input = new ByteArrayInputStream(new byte[0]);
PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName,
key, input, objectMetadata);
Expand Down Expand Up @@ -310,20 +322,26 @@ public PartETag uploadPart(
String key, String uploadId, int partNum, long partSize, byte[] md5Hash) throws IOException {
LOG.debug("Upload the part to the cos key [{}]. upload id: {}, part number: {}, part size: {}",
key, uploadId, partNum, partSize);
ObjectMetadata objectMetadata = new ObjectMetadata();
if (openCrc32c) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

UploadPartRequest uploadPartRequest = new UploadPartRequest();
uploadPartRequest.setBucketName(this.bucketName);
uploadPartRequest.setUploadId(uploadId);
uploadPartRequest.setInputStream(inputStream);
uploadPartRequest.setPartNumber(partNum);
uploadPartRequest.setPartSize(partSize);
uploadPartRequest.setObjectMetadata(objectMetadata);
if (null != md5Hash) {
uploadPartRequest.setMd5Digest(Base64.encodeAsString(md5Hash));
}
uploadPartRequest.setKey(key);
if (this.trafficLimit >= 0) {
uploadPartRequest.setTrafficLimit(this.trafficLimit);
}
this.setEncryptionMetadata(uploadPartRequest, new ObjectMetadata());
this.setEncryptionMetadata(uploadPartRequest, objectMetadata);

try {
UploadPartResult uploadPartResult =
Expand Down Expand Up @@ -360,12 +378,18 @@ public String getUploadId(String key) throws IOException {
return "";
}

ObjectMetadata objectMetadata = new ObjectMetadata();
if (openCrc32c) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

InitiateMultipartUploadRequest initiateMultipartUploadRequest =
new InitiateMultipartUploadRequest(bucketName, key);
if (null != this.storageClass) {
initiateMultipartUploadRequest.setStorageClass(this.storageClass);
}
this.setEncryptionMetadata(initiateMultipartUploadRequest, new ObjectMetadata());
initiateMultipartUploadRequest.setObjectMetadata(objectMetadata);
this.setEncryptionMetadata(initiateMultipartUploadRequest, objectMetadata);
try {
InitiateMultipartUploadResult initiateMultipartUploadResult =
(InitiateMultipartUploadResult) this.callCOSClientWithRetry(initiateMultipartUploadRequest);
Expand All @@ -389,6 +413,7 @@ public int compare(PartETag o1, PartETag o2) {
}
});
try {
// TODO TMD complete multi part java sdk how to add the meta header?
CompleteMultipartUploadRequest completeMultipartUploadRequest =
new CompleteMultipartUploadRequest(bucketName, key, uploadId,
partETagList);
Expand Down Expand Up @@ -419,6 +444,7 @@ private FileMetadata QueryObjectMetadata(String key) throws IOException {

String ETag = objectMetadata.getETag();
String crc64ecm = objectMetadata.getCrc64Ecma();
String crc32cm = (String)objectMetadata.getRawMetadataValue(Constants.CRC32C_RESP_HEADER);
String versionId = objectMetadata.getVersionId();
Map<String, byte[]> userMetadata = null;
if (objectMetadata.getUserMetadata() != null) {
Expand All @@ -443,7 +469,7 @@ private FileMetadata QueryObjectMetadata(String key) throws IOException {
}
FileMetadata fileMetadata =
new FileMetadata(key, fileSize, mtime,
!key.endsWith(PATH_DELIMITER), ETag, crc64ecm, versionId, objectMetadata.getStorageClass(),
!key.endsWith(PATH_DELIMITER), ETag, crc64ecm, crc32cm, versionId, objectMetadata.getStorageClass(),
userMetadata);
LOG.debug("Retrieve the file metadata. cos key: {}, ETag:{}, length:{}, crc64ecm: {}.", key,
objectMetadata.getETag(), objectMetadata.getContentLength(), objectMetadata.getCrc64Ecma());
Expand Down Expand Up @@ -573,6 +599,10 @@ private void storeAttribute(String key, String attribute, byte[] value, boolean
objectMetadata.setUserMetadata(userMetadata);

// 构造原地copy请求来设置用户自定义属性
if (openCrc32c) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

CopyObjectRequest copyObjectRequest = new CopyObjectRequest(bucketName, key, bucketName, key);
if (null != objectMetadata.getStorageClass()) {
copyObjectRequest.setStorageClass(objectMetadata.getStorageClass());
Expand Down Expand Up @@ -776,10 +806,10 @@ private PartialListing list(String prefix, String delimiter,
long fileLen = cosObjectSummary.getSize();
String fileEtag = cosObjectSummary.getETag();
if (cosObjectSummary.getKey().endsWith(PATH_DELIMITER) && cosObjectSummary.getSize() == 0) {
fileMetadataArray.add(new FileMetadata(filePath, fileLen, mtime, false, fileEtag, null, null, cosObjectSummary.getStorageClass()));
fileMetadataArray.add(new FileMetadata(filePath, fileLen, mtime, false, fileEtag, null, null, null, cosObjectSummary.getStorageClass()));
} else {
fileMetadataArray.add(new FileMetadata(filePath, fileLen, mtime,
true, fileEtag, null, null, cosObjectSummary.getStorageClass()));
true, fileEtag, null, null, null, cosObjectSummary.getStorageClass()));
}
}
List<String> commonPrefixes = objectListing.getCommonPrefixes();
Expand Down Expand Up @@ -826,6 +856,10 @@ public void delete(String key) throws IOException {
public void rename(String srcKey, String dstKey) throws IOException {
LOG.debug("Rename the source cos key [{}] to the dest cos key [{}].", srcKey, dstKey);
try {
ObjectMetadata objectMetadata = new ObjectMetadata();
if (openCrc32c) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}
CopyObjectRequest copyObjectRequest =
new CopyObjectRequest(bucketName, srcKey, bucketName,
dstKey);
Expand All @@ -834,7 +868,8 @@ public void rename(String srcKey, String dstKey) throws IOException {
if (null != sourceFileMetadata.getStorageClass()) {
copyObjectRequest.setStorageClass(sourceFileMetadata.getStorageClass());
}
this.setEncryptionMetadata(copyObjectRequest, new ObjectMetadata());
copyObjectRequest.setNewObjectMetadata(objectMetadata);
this.setEncryptionMetadata(copyObjectRequest, objectMetadata);

if (null != this.customerDomainEndpointResolver) {
if (null != this.customerDomainEndpointResolver.getEndpoint()) {
Expand All @@ -857,13 +892,19 @@ public void rename(String srcKey, String dstKey) throws IOException {
@Override
public void copy(String srcKey, String dstKey) throws IOException {
try {
ObjectMetadata objectMetadata = new ObjectMetadata();
if (openCrc32c) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

CopyObjectRequest copyObjectRequest =
new CopyObjectRequest(bucketName, srcKey, bucketName, dstKey);
FileMetadata sourceFileMetadata = this.retrieveMetadata(srcKey);
if (null != sourceFileMetadata.getStorageClass()) {
copyObjectRequest.setStorageClass(sourceFileMetadata.getStorageClass());
}
this.setEncryptionMetadata(copyObjectRequest, new ObjectMetadata());
copyObjectRequest.setNewObjectMetadata(objectMetadata);
this.setEncryptionMetadata(copyObjectRequest, objectMetadata);
if (null != this.customerDomainEndpointResolver) {
if (null != this.customerDomainEndpointResolver.getEndpoint()) {
copyObjectRequest.setSourceEndpointBuilder(this.customerDomainEndpointResolver);
Expand Down
18 changes: 12 additions & 6 deletions src/main/java/org/apache/hadoop/fs/FileMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class FileMetadata {
private final boolean isFile;
private final String ETag;
private final String crc64ecm;
private final String crc32cm;
private final String versionId;
private final String storageClass;
private final Map<String, byte[]> userAttributes;
Expand All @@ -33,27 +34,28 @@ public FileMetadata(String key, long length, long lastModified,
}

public FileMetadata(String key, long length, long lastModified, boolean isFile, String ETag) {
this(key, length, lastModified, isFile, ETag, null, null);
this(key, length, lastModified, isFile, ETag, null, null, null);
}

public FileMetadata(String key, long length, long lastModified, boolean isFile, String eTag, String crc64ecm,
String versionId) {
this(key, length, lastModified, isFile, eTag, crc64ecm, versionId, null, null);
String crc32cm, String versionId) {
this(key, length, lastModified, isFile, eTag, crc64ecm, crc32cm, versionId, null, null);
}

public FileMetadata(String key, long length, long lastModified, boolean isFile, String eTag, String crc64ecm,
String versionId, String storageClass) {
this(key, length, lastModified, isFile, eTag, crc64ecm, versionId, storageClass, null);
String crc32cm, String versionId, String storageClass) {
this(key, length, lastModified, isFile, eTag, crc64ecm, crc32cm, versionId, storageClass, null);
}

public FileMetadata(String key, long length, long lastModified, boolean isFile, String eTag, String crc64ecm,
String versionId, String storageClass, Map<String, byte[]> userAttributes) {
String crc32cm, String versionId, String storageClass, Map<String, byte[]> userAttributes) {
this.key = key;
this.length = length;
this.lastModified = lastModified;
this.isFile = isFile;
this.ETag = eTag;
this.crc64ecm = crc64ecm;
this.crc32cm = crc32cm;
this.versionId = versionId;
this.storageClass = storageClass;
this.userAttributes = userAttributes;
Expand Down Expand Up @@ -83,6 +85,10 @@ public String getCrc64ecm() {
return crc64ecm;
}

public String getCrc32cm() {
return crc32cm;
}

public String getStorageClass() {
return storageClass;
}
Expand Down

0 comments on commit 84cf837

Please sign in to comment.