diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationTest.java index 6664f4bbb2..c8faf9e10c 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationTest.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationTest.java @@ -56,6 +56,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Arrays.stream; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_CREDENTIAL_PROVIDER_PATH; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThrows; import com.google.api.client.http.HttpResponseException; @@ -64,7 +65,17 @@ import com.google.cloud.hadoop.fs.gcs.auth.AbstractDelegationTokenBinding; import com.google.cloud.hadoop.fs.gcs.auth.TestDelegationTokenBindingImpl; import com.google.cloud.hadoop.gcsio.*; +import com.google.cloud.hadoop.gcsio.CreateBucketOptions; +import com.google.cloud.hadoop.gcsio.FolderInfo; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorage; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemIntegrationHelper; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions.ClientType; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; +import com.google.cloud.hadoop.gcsio.ListFolderOptions; +import com.google.cloud.hadoop.gcsio.MethodOutcome; +import com.google.cloud.hadoop.gcsio.StorageResourceId; import com.google.cloud.hadoop.gcsio.testing.InMemoryGoogleCloudStorage; import com.google.cloud.hadoop.util.AccessTokenProvider; import com.google.cloud.hadoop.util.ApiErrorExtractor; @@ -81,7 +92,9 @@ import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Random; import java.util.UUID; import java.util.function.Function; import org.apache.hadoop.conf.Configuration; @@ -2319,6 +2332,184 @@ public void testThreadTraceEnabledRename() throws Exception { assertThat(ghfs.exists(dest)).isTrue(); } + @Test + public void testHnBucketNonRecursiveDeleteOperation() throws Exception { + String bucketName = this.gcsiHelper.getUniqueBucketName("hn"); + GoogleHadoopFileSystem googleHadoopFileSystem = createHnEnabledBucket(bucketName); + String bucketPath = "gs://" + bucketName; + try { + googleHadoopFileSystem.mkdirs(new Path("/A/")); + assertThrows( + "Cannot delete a non-empty directory", + java.nio.file.DirectoryNotEmptyException.class, + () -> googleHadoopFileSystem.delete(new Path(bucketPath), false)); + + // verify only "A/" folder exists + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/A/")).isEqualTo(1); + + // delete A/ non recursively + googleHadoopFileSystem.delete(new Path(bucketPath + "/A"), false); + + // check that on listing we get no folders for folder "A/" + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/A/")).isEqualTo(0); + } finally { + googleHadoopFileSystem.delete(new Path(bucketPath)); + } + } + + @Test + public void testHnBucketRecursiveDeleteOperationOnBucket() throws Exception { + String bucketName = this.gcsiHelper.getUniqueBucketName("hn"); + String bucketPath = "gs://" + bucketName; + GoogleHadoopFileSystem googleHadoopFileSystem = createHnEnabledBucket(bucketName); + createResources(googleHadoopFileSystem); + assertThat(getSubFolderCount(googleHadoopFileSystem, "gs://" + bucketName + "/")).isEqualTo(22); + assertThrows( + "Cannot delete a non-empty directory", + java.nio.file.DirectoryNotEmptyException.class, + () -> googleHadoopFileSystem.delete(new Path(bucketPath), false)); + + // delete bucket + googleHadoopFileSystem.delete(new Path(bucketPath), true); + assertThat( + googleHadoopFileSystem + .getGcsFs() + .getGcs() + .getItemInfo(new StorageResourceId(bucketName)) + .exists()) + .isFalse(); + + assertThrows( + "The specified bucket does not exist : " + bucketPath, + com.google.api.gax.rpc.NotFoundException.class, + () -> assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath)).isEqualTo(0)); + } + + @Test + public void testHnBucketRecursiveDeleteOperationOnDirectory() throws Exception { + String bucketName = this.gcsiHelper.getUniqueBucketName("hn"); + String bucketPath = "gs://" + bucketName; + GoogleHadoopFileSystem googleHadoopFileSystem = createHnEnabledBucket(bucketName); + try { + createResources(googleHadoopFileSystem); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/A/")).isEqualTo(21); + assertThrows( + "Cannot delete a non-empty directory", + java.nio.file.DirectoryNotEmptyException.class, + () -> googleHadoopFileSystem.delete(new Path(bucketPath + "/A"), false)); + + // rename A/ to B/ + googleHadoopFileSystem.rename(new Path(bucketPath + "/A/"), new Path(bucketPath + "/B/")); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/B/")).isEqualTo(21); + + // delete B/ + googleHadoopFileSystem.delete(new Path("/B"), true); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/B/")).isEqualTo(0); + + // rename C/ to B/ + googleHadoopFileSystem.rename(new Path(bucketPath + "/C/"), new Path(bucketPath + "/B/")); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/B/")).isEqualTo(1); + } finally { + googleHadoopFileSystem.delete(new Path(bucketPath)); + } + } + + @Test + public void testHnBucketDeleteOperationOnNonExistingFolder() throws Exception { + String bucketName = this.gcsiHelper.getUniqueBucketName("hn"); + String bucketPath = "gs://" + bucketName; + GoogleHadoopFileSystem googleHadoopFileSystem = createHnEnabledBucket(bucketName); + + try { + googleHadoopFileSystem.mkdirs(new Path("/A/")); + googleHadoopFileSystem.mkdirs(new Path("/A/C/")); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/A/")).isEqualTo(2); + + assertThrows( + "Cannot delete a non-empty directory", + java.nio.file.DirectoryNotEmptyException.class, + () -> googleHadoopFileSystem.delete(new Path(bucketPath + "/A"), false)); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/A/")).isEqualTo(2); + + // try to delete a non existing folder + List folderInfoList = new ArrayList<>(); + folderInfoList.add(new FolderInfo(FolderInfo.createFolderInfoObject(bucketName, "A/"))); + folderInfoList.add(new FolderInfo(FolderInfo.createFolderInfoObject(bucketName, "A/B/"))); + assertThrows( + "The folder you tried to delete is not empty.", + java.io.IOException.class, + () -> googleHadoopFileSystem.getGcsFs().getGcs().deleteFolders(folderInfoList)); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/A/")).isEqualTo(2); + + // delete A/ + googleHadoopFileSystem.delete(new Path("/A"), true); + assertThat(getSubFolderCount(googleHadoopFileSystem, bucketPath + "/A/")).isEqualTo(0); + } finally { + googleHadoopFileSystem.delete(new Path(bucketPath)); + } + } + + private void createFile(GoogleHadoopFileSystem googleHadoopFileSystem, Path path) + throws Exception { + try (FSDataOutputStream fout = googleHadoopFileSystem.create(path)) { + fout.writeBytes("data"); + } + } + + private GoogleHadoopFileSystem createHnEnabledBucket(String bucketName) throws Exception { + GoogleHadoopFileSystem googleHadoopFileSystem = new GoogleHadoopFileSystem(); + URI initUri = new URI("gs://" + bucketName); + Configuration config = loadConfig(); + config.setBoolean("fs.gs.hierarchical.namespace.folders.enable", true); + googleHadoopFileSystem.initialize(initUri, config); + GoogleCloudStorage theGcs = googleHadoopFileSystem.getGcsFs().getGcs(); + theGcs.createBucket( + bucketName, CreateBucketOptions.builder().setHierarchicalNamespaceEnabled(true).build()); + assertThat(theGcs.isHnBucket(new Path(initUri + "/").toUri())).isTrue(); + return googleHadoopFileSystem; + } + + /** Pathlocation should end with "/" prefix */ + private Integer getSubFolderCount( + GoogleHadoopFileSystem googleHadoopFileSystem, String pathLocation) + throws IOException, URISyntaxException { + List initialListOfFolders = + googleHadoopFileSystem + .getGcsFs() + .listFoldersInfoForPrefixPage( + new URI(pathLocation), ListFolderOptions.builder().build(), null) + .getItems(); + return initialListOfFolders.size(); + } + + private void createResources(GoogleHadoopFileSystem googleHadoopFileSystem) throws Exception { + googleHadoopFileSystem.mkdirs(new Path("A/")); + googleHadoopFileSystem.mkdirs(new Path("A/dir1/")); + googleHadoopFileSystem.mkdirs(new Path("A/dir2/")); + for (int i = 0; i < 15; i++) { + Random r = new Random(); + googleHadoopFileSystem.mkdirs(new Path("A/dir1/" + r.nextInt() + "/")); + } + googleHadoopFileSystem.mkdirs(new Path("A/dir1/subdir1/")); + googleHadoopFileSystem.mkdirs(new Path("A/dir1/subdir2/")); + googleHadoopFileSystem.mkdirs(new Path("A/dir2/subdir3/")); + createFile(googleHadoopFileSystem, new Path("A/1")); + createFile(googleHadoopFileSystem, new Path("A/2")); + + googleHadoopFileSystem.mkdirs(new Path("C/")); + createFile(googleHadoopFileSystem, new Path("C/1")); + createFile(googleHadoopFileSystem, new Path("C/2")); + createFile(googleHadoopFileSystem, new Path("6")); + } + + private void checkMetric( + String name, StorageStatistics statistics, HashSet metricNames, String statsString) { + assertThat(metricNames.contains(name)).isTrue(); + assertThat(statistics.isTracked(name)).isTrue(); + assertThat(statsString.contains(name + "=")).isTrue(); + assertEquals(0, statistics.getLong(name).longValue()); + } + private static Long getMetricValue(StorageStatistics stats, GhfsStatistic invocationCreate) { return stats.getLong(invocationCreate.getSymbol()); } diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java index 356f15ef75..2fdf12068b 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java @@ -319,6 +319,9 @@ public void unauthenticatedAccessToPublicBuckets_googleCloudProperties() {} @Override public void testInitializeCompatibleWithHadoopCredentialProvider() {} + @Override + public void testRenameHnBucket() {} + /* Custom InMemoryGoogleCloudStorage object which throws exception when reading */ private class CustomInMemoryGoogleCloudStorage extends InMemoryGoogleCloudStorage { private IOException exceptionThrown = @@ -385,5 +388,14 @@ public boolean isOpen() { } @Override - public void testRenameHnBucket() {} + public void testHnBucketRecursiveDeleteOperationOnDirectory() {} + + @Override + public void testHnBucketRecursiveDeleteOperationOnBucket() {} + + @Override + public void testHnBucketNonRecursiveDeleteOperation() {} + + @Override + public void testHnBucketDeleteOperationOnNonExistingFolder() {} } diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/DeleteFolderOperation.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/DeleteFolderOperation.java new file mode 100644 index 0000000000..a8215f4d66 --- /dev/null +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/DeleteFolderOperation.java @@ -0,0 +1,268 @@ +/* + * Copyright 2024 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.hadoop.gcsio; + +import static com.google.common.base.Preconditions.checkArgument; + +import com.google.cloud.hadoop.util.ApiErrorExtractor; +import com.google.cloud.hadoop.util.ErrorTypeExtractor; +import com.google.cloud.hadoop.util.ErrorTypeExtractor.ErrorType; +import com.google.cloud.hadoop.util.GoogleCloudStorageEventBus; +import com.google.cloud.hadoop.util.GrpcErrorTypeExtractor; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; +import com.google.common.flogger.GoogleLogger; +import com.google.common.util.concurrent.FutureCallback; +import com.google.storage.control.v2.DeleteFolderRequest; +import com.google.storage.control.v2.StorageControlClient; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentHashMap.KeySetView; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import javax.annotation.Nonnull; + +@VisibleForTesting +class DeleteFolderOperation { + private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); + + // Maximum number of times to retry deletes in the case of precondition failures. + private static final int MAXIMUM_PRECONDITION_FAILURES_IN_DELETE = 4; + private static final ApiErrorExtractor errorExtractor = ApiErrorExtractor.INSTANCE; + + // Error extractor to map APi exception to meaningful ErrorTypes. + private static final ErrorTypeExtractor errorTypeExtractor = GrpcErrorTypeExtractor.INSTANCE; + private final GoogleCloudStorageOptions storageOptions; + private final KeySetView allExceptions; + private final List folders; + private final BatchExecutor batchExecutor; + private final StorageControlClient storageControlClient; + private final BlockingQueue folderDeleteBlockingQueue; + private final ConcurrentHashMap countOfChildren; + + DeleteFolderOperation( + List folders, + GoogleCloudStorageOptions storageOptions, + StorageControlClient storageControlClient) { + this.folders = folders; + this.storageOptions = storageOptions; + this.storageControlClient = storageControlClient; + this.folderDeleteBlockingQueue = new LinkedBlockingQueue<>(folders.size()); + + // threads for parallel delete of folder resources + this.batchExecutor = new BatchExecutor(this.storageOptions.getBatchThreads()); + + // Gather exceptions to wrap in a composite exception at the end. + this.allExceptions = ConcurrentHashMap.newKeySet(); + + // Map to store number of children for each parent object + this.countOfChildren = new ConcurrentHashMap<>(); + } + + /** Helper function that performs the deletion process for folder resources */ + public void performDeleteOperation() throws InterruptedException { + int folderSize = folders.size(); + computeChildrenForFolderResource(); + + // this will avoid infinite loop when all folders are deleted + while (folderSize != 0 && encounteredNoExceptions()) { + FolderInfo folderToDelete = getElementFromBlockingQueue(); + folderSize--; + + // Queue the deletion request + queueSingleFolderDelete(folderToDelete, /* attempt */ 1); + } + batchExecutorShutdown(); + } + + /** Shutting down batch executor and flushing any remaining requests */ + private void batchExecutorShutdown() { + try { + batchExecutor.shutdown(); + } catch (IOException e) { + addException( + new IOException( + String.format("Error in shutting down batch executor : %s", e.getMessage()))); + } + } + + public boolean encounteredNoExceptions() { + return allExceptions.isEmpty(); + } + + public KeySetView getAllExceptions() { + return allExceptions; + } + + /** Gets the head from the blocking queue */ + public FolderInfo getElementFromBlockingQueue() throws InterruptedException { + try { + return folderDeleteBlockingQueue.poll(1, TimeUnit.MINUTES); + } catch (InterruptedException e) { + logger.atSevere().log( + "Encountered exception while getting an element from queue in HN enabled bucket : %s", e); + throw e; + } + } + + /** Adding to batch executor's queue */ + public void addToToBatchExecutorQueue(Callable callable, FutureCallback callback) { + batchExecutor.queue(callable, callback); + } + + /** Computes the number of children for each folder resource */ + public void computeChildrenForFolderResource() { + for (FolderInfo currentFolder : folders) { + if (!countOfChildren.containsKey(currentFolder.getFolderName())) { + countOfChildren.put(currentFolder.getFolderName(), 0L); + } + + String parentFolder = currentFolder.getParentFolderName(); + if (!Strings.isNullOrEmpty(parentFolder)) { + countOfChildren.merge(parentFolder, 1L, (oldValue, newValue) -> oldValue + newValue); + } + } + // Add leaf folders to blocking queue + for (FolderInfo currentFolder : folders) { + if (countOfChildren.get(currentFolder.getFolderName()) == 0L) { + addFolderResourceInBlockingQueue(currentFolder); + } + } + } + + /** + * Helper function to add the parent of successfully deleted folder resource into the blocking + * queue + * + * @param folderResource of the folder that is now deleted + */ + protected synchronized void successfullDeletionOfFolderResource(FolderInfo folderResource) { + // remove the folderResource from list of map + countOfChildren.remove(folderResource.getFolderName()); + + String parentFolder = folderResource.getParentFolderName(); + if (countOfChildren.containsKey(parentFolder)) { + + // update the parent's count of children + countOfChildren.replace(parentFolder, countOfChildren.get(parentFolder) - 1); + + // if the parent folder is now empty, append in the queue + if (countOfChildren.get(parentFolder) == 0) { + addFolderResourceInBlockingQueue( + new FolderInfo( + FolderInfo.createFolderInfoObject(folderResource.getBucket(), parentFolder))); + } + } + } + + /** Helper function to delete a single folder resource */ + protected void queueSingleFolderDelete(@Nonnull final FolderInfo folder, final int attempt) { + final String bucketName = folder.getBucket(); + final String folderName = folder.getFolderName(); + checkArgument( + !Strings.isNullOrEmpty(bucketName), + String.format("No bucket for folder resource %s", bucketName)); + checkArgument( + !Strings.isNullOrEmpty(folderName), + String.format("No folder path for folder resource %s", folderName)); + + addToToBatchExecutorQueue( + new DeleteFolderRequestCallable(folder, storageControlClient), + getDeletionCallback(folder, allExceptions, attempt)); + } + + /** + * Helper function to add folderResource to blocking queue + * + * @param folderResource + */ + private void addFolderResourceInBlockingQueue(FolderInfo folderResource) { + folderDeleteBlockingQueue.add(folderResource); + } + + /** Helper to create a callback for a particular deletion request for folder. */ + private FutureCallback getDeletionCallback( + final FolderInfo resourceId, + final KeySetView allExceptions, + final int attempt) { + return new FutureCallback() { + @Override + public void onSuccess(Void result) { + logger.atFiner().log("Successfully deleted folder %s", resourceId.toString()); + successfullDeletionOfFolderResource(resourceId); + } + + @Override + public void onFailure(Throwable throwable) { + if (isErrorType(throwable, ErrorType.NOT_FOUND)) { + // Ignore item-not-found errors. We do not have to delete what we cannot find. + // This + // error typically shows up when we make a request to delete something and the + // server + // receives the request but we get a retry-able error before we get a response. + // During a retry, we no longer find the item because the server had deleted + // it already. + logger.atFiner().log( + "Delete folder '%s' not found: %s", resourceId, throwable.getMessage()); + successfullDeletionOfFolderResource(resourceId); + } else if (isErrorType(throwable, ErrorType.FAILED_PRECONDITION) + && attempt <= MAXIMUM_PRECONDITION_FAILURES_IN_DELETE) { + logger.atInfo().log( + "Precondition not met while deleting '%s'. Attempt %s." + " Retrying:%s", + resourceId, attempt, throwable); + queueSingleFolderDelete(resourceId, attempt + 1); + } else { + GoogleCloudStorageEventBus.postOnException(); + addException( + new IOException( + String.format("Error deleting '%s', stage 2", resourceId), throwable)); + } + } + }; + } + + private boolean isErrorType(Throwable throwable, ErrorType errorType) { + return throwable instanceof Exception + && (errorTypeExtractor.getErrorType((Exception) throwable) == errorType); + } + + private synchronized void addException(IOException e) { + allExceptions.add(e); + } + + /* Callable class specifically for deletion of folder resource */ + private class DeleteFolderRequestCallable implements Callable { + private StorageControlClient storageControlClient; + private DeleteFolderRequest deleteFolderRequest; + + @Override + public Void call() { + storageControlClient.deleteFolder(deleteFolderRequest); + return null; + } + + DeleteFolderRequestCallable(FolderInfo folder, StorageControlClient storageControlClient) { + checkArgument(storageControlClient != null, "StorageControlClient cannot be null"); + this.storageControlClient = storageControlClient; + this.deleteFolderRequest = + DeleteFolderRequest.newBuilder().setName(folder.toString()).build(); + } + } +} diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/FolderInfo.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/FolderInfo.java new file mode 100644 index 0000000000..4df363a207 --- /dev/null +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/FolderInfo.java @@ -0,0 +1,134 @@ +/* + * Copyright 2024 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.hadoop.gcsio; + +import static com.google.common.base.Preconditions.checkState; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; +import com.google.storage.control.v2.Folder; +import javax.annotation.Nonnull; + +@VisibleForTesting +/** Contains information about a Folder resource and is applicable for only HN enabled bucket */ +public class FolderInfo { + public static final String BUCKET_PREFIX = "projects/_/buckets/"; + public static final String FOLDER_PREFIX = "/folders/"; + public static final String PATH = "/"; + + private final String bucket; + + private final String folderName; + + /** + * Constructs an instance of FolderInfo. + * + * @param folder Information about the underlying folder. + */ + public FolderInfo(@Nonnull Folder folder) { + checkState( + !Strings.isNullOrEmpty(folder.getName()), + "Folder resource has invalid path : %s", + folder.getName()); + this.bucket = getBucketString(folder.getName()); + this.folderName = getFolderString(folder.getName()); + } + + /** + * Returns the folder object with provided bucket and path + * + * @param bucketName + * @param folderName + * @return FolderInfo object + */ + public static Folder createFolderInfoObject(String bucketName, String folderName) { + checkState( + !Strings.isNullOrEmpty(bucketName), + "Folder resource has invalid bucket name: %s", + bucketName); + checkState(folderName != null, "Folder resource has invalid folder name: %s", folderName); + + // Add "/" suffix only if foldername is not empty and does not end with "/" + String suffix = (folderName.equals("") ? "" : (folderName.endsWith(PATH) ? "" : PATH)); + return Folder.newBuilder() + .setName(String.join("", BUCKET_PREFIX, bucketName, FOLDER_PREFIX, folderName, suffix)) + .build(); + } + + /** + * Returns the bucket string. Eg : /projects/_/buckets/BUCKET_NAME/folders/FOLDER_NAME is the + * template of path, then bucket string will be BUCKET_NAME + * + * @param path + * @return bucket string + */ + private String getBucketString(String path) { + checkState( + path.startsWith(BUCKET_PREFIX), + "Invalid bucket resource name. Bucket resource name must begin with 'projects/_/buckets/' for global-namespaced buckets and contain no invalid characters or patterns : %s", + path); + int startIndexOfBucketPrefix = path.indexOf(BUCKET_PREFIX) + BUCKET_PREFIX.length(); + return path.substring(startIndexOfBucketPrefix, path.indexOf(PATH, startIndexOfBucketPrefix)); + } + + /** + * Returns the Folder string. Eg : /projects/_/buckets/BUCKET_NAME/folders/FOLDER_NAME is the + * template of path, then folder string will be FOLDER_NAME eg : + * /projects/_/buckets/BUCKET_NAME/folders/A/B/ -> returns A/B/ eg : + * /projects/_/buckets/BUCKET_NAME/folders/ -> returns "" + * + *

Since this method is always called after createFolderInfoObject() method, "/" suffix is + * already taken care of. + * + * @param path + * @return + */ + private String getFolderString(String path) { + checkState(path.contains(FOLDER_PREFIX), "Invalid folder path: %s", path); + int startIndex = path.indexOf(FOLDER_PREFIX) + FOLDER_PREFIX.length(); + return path.substring(startIndex); + } + + /** Gets the path of this file or directory. */ + public String getBucket() { + return bucket; + } + + /** Returns the folder name, ie path excluding the bucket name */ + public String getFolderName() { + return folderName; + } + + public boolean isBucket() { + return folderName.equals(""); + } + + /** + * Returns the parent folder name if exists, else returns empty string + * + * @return parent folderName + */ + public String getParentFolderName() { + int lastIndex = folderName.lastIndexOf(PATH, folderName.length() - 2); + return folderName.substring(0, lastIndex + 1); + } + + /** Gets string representation of this instance. */ + public String toString() { + return String.join("", BUCKET_PREFIX, bucket, FOLDER_PREFIX, folderName); + } +} diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java index 52aa26f67f..47ee342e50 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java @@ -126,6 +126,12 @@ public Map getStatistics() { return delegate.getStatistics(); } + @Override + public void deleteFolders(List folders) throws IOException { + logger.atFiner().log("%s.deleteFolders(%s)", delegateClassName, folders); + delegate.deleteFolders(folders); + } + @Override public void copy( String srcBucketName, @@ -188,6 +194,17 @@ public ListPage listObjectInfoPage( return delegate.listObjectInfoPage(bucketName, objectNamePrefix, listOptions, pageToken); } + @Override + public ListPage listFolderInfoForPrefixPage( + String bucketName, + String folderNamePrefix, + ListFolderOptions listFolderOptions, + String pageToken) + throws IOException { + return delegate.listFolderInfoForPrefixPage( + bucketName, folderNamePrefix, listFolderOptions, pageToken); + } + @Override public GoogleCloudStorageItemInfo getItemInfo(StorageResourceId resourceId) throws IOException { logger.atFiner().log("%s.getItemInfo(%s)", delegateClassName, resourceId); diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java index acf96fb021..117b7a80ce 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java @@ -18,6 +18,7 @@ import static com.google.common.base.Preconditions.checkArgument; +import com.google.api.services.storage.model.Folder; import java.io.IOException; import java.net.URI; import java.nio.channels.SeekableByteChannel; @@ -211,6 +212,15 @@ SeekableByteChannel open( */ void deleteObjects(List fullObjectNames) throws IOException; + /** + * Deletes the given folder resources. Does not throw any exception for "folders not found" + * errors. + * + * @param folders names of folder resources to delete + * @throws IOException if folder exists but cannot be deleted + */ + void deleteFolders(List folders) throws IOException; + /** * Copies metadata of the given objects. After the copy is successfully complete, each object blob * is reachable by two different names. Copying between two different locations or between two @@ -389,6 +399,23 @@ ListPage listObjectInfoPage( String bucketName, String objectNamePrefix, ListObjectOptions listOptions, String pageToken) throws IOException; + /** + * Returns the list of folder resources. Applicable only for HN enabled bucket + * + * @param bucketName bucket name + * @param folderNamePrefix folder resource name prefix + * @param listFolderOptions options to use when listing folder resources + * @param pageToken the page token + * @return {@link ListPage} folder resources with listed {@link Folder}s + * @throws IOException on IO error + */ + ListPage listFolderInfoForPrefixPage( + String bucketName, + String folderNamePrefix, + ListFolderOptions listFolderOptions, + String pageToken) + throws IOException; + /** * Gets information about an object or a bucket. * diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java index 6b42b36127..8af044f9a5 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java @@ -253,6 +253,16 @@ default List listFileInfo(URI path) throws IOException { */ List listFileInfo(URI path, ListFileOptions listOptions) throws IOException; + /** + * Returns the list of folder resources in the prefix. It lists all the folder resources + * + * @param prefix the prefix to use to list all matching folder resources. + * @param pageToken the page token to list + * @param listFolderOptions the page token to list + */ + ListPage listFoldersInfoForPrefixPage( + URI prefix, ListFolderOptions listFolderOptions, String pageToken) throws IOException; + /** * Gets information about the given path item. * diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java index aca7cdb6e7..ffba05ef46 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java @@ -38,6 +38,7 @@ import com.google.cloud.hadoop.util.ThreadTrace; import com.google.cloud.hadoop.util.TraceOperation; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; @@ -51,8 +52,10 @@ import java.nio.file.DirectoryNotEmptyException; import java.nio.file.FileAlreadyExistsException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Objects; @@ -68,6 +71,7 @@ import java.util.function.Function; import java.util.regex.Pattern; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; /** Provides FS semantics over GCS based on Objects API */ @@ -327,6 +331,9 @@ public void delete(URI path, boolean recursive) throws IOException { () -> getFileInfoInternal(parentId, /* inferImplicitDirectories= */ false)); } + boolean isHnBucket = + (this.options.getCloudStorageOptions().isHnBucketRenameEnabled() && gcs.isHnBucket(path)); + List listOfFolders = new LinkedList<>(); List itemsToDelete; // Delete sub-items if it is a directory. if (fileInfo.isDirectory()) { @@ -338,6 +345,32 @@ public void delete(URI path, boolean recursive) throws IOException { : listFileInfoForPrefixPage( fileInfo.getPath(), DELETE_RENAME_LIST_OPTIONS, /* pageToken= */ null) .getItems(); + + /*TODO : making listing of folder and object resources in parallel*/ + if (isHnBucket) { + /** + * Get list of folders if the bucket is HN enabled bucket For recursive delete, get all + * folder resources. For non-recursive delete, delete the folder directly if it is a + * directory and do not do anything if the path points to a bucket. + */ + String bucketName = getBucketName(path); + String folderName = getFolderName(path); + listOfFolders = + recursive + ? listFoldersInfoForPrefixPage( + fileInfo.getPath(), ListFolderOptions.DEFAULT, /* pageToken= */ null) + .getItems() + // will not delete for a bucket + : (folderName.equals("") + ? new LinkedList<>() + : Arrays.asList( + new FolderInfo(FolderInfo.createFolderInfoObject(bucketName, folderName)))); + + logger.atFiner().log( + "Encountered HN enabled bucket with %s number of folder in path : %s", + listOfFolders.size(), path); + } + if (!itemsToDelete.isEmpty() && !recursive) { GoogleCloudStorageEventBus.postOnException(); throw new DirectoryNotEmptyException("Cannot delete a non-empty directory."); @@ -350,21 +383,92 @@ public void delete(URI path, boolean recursive) throws IOException { List bucketsToDelete = new ArrayList<>(); (fileInfo.getItemInfo().isBucket() ? bucketsToDelete : itemsToDelete).add(fileInfo); - deleteInternal(itemsToDelete, bucketsToDelete); + deleteInternalWithFolders(itemsToDelete, listOfFolders, bucketsToDelete); repairImplicitDirectory(parentInfoFuture); } + /** + * Return the bucket name if exists + * + * @param path + * @return bucket name + */ + private String getBucketName(@Nonnull URI path) { + checkState( + !Strings.isNullOrEmpty(path.getAuthority()), "Bucket name cannot be null : %s", path); + return path.getAuthority(); + } + + /** + * Returns the folder name if exists else return empty string. + * + * @param path + * @return folder path + */ + private String getFolderName(@Nonnull URI path) { + checkState( + path.getPath().startsWith(PATH_DELIMITER), "Invalid folder name: %s", path.getPath()); + return path.getPath().substring(1); + } + + /** + * Returns the list of folder resources in the prefix. It lists all the folder resources + * + * @param prefix the prefix to use to list all matching folder resources. + * @param pageToken the page token to list + * @param listFolderOptions the page token to list + */ + public ListPage listFoldersInfoForPrefixPage( + URI prefix, ListFolderOptions listFolderOptions, String pageToken) throws IOException { + logger.atFiner().log( + "listFoldersInfoForPrefixPage(prefix: %s, pageToken:%s)", prefix, pageToken); + StorageResourceId prefixId = getPrefixId(prefix); + return gcs.listFolderInfoForPrefixPage( + prefixId.getBucketName(), prefixId.getObjectName(), listFolderOptions, pageToken); + } + + /** + * Deletes the given folder resources + * + * @param listOfFolders to delete + * @throws IOException + */ + private void deleteFolders(@Nonnull List listOfFolders) throws IOException { + if (listOfFolders.isEmpty()) return; + logger.atFiner().log( + "deleteFolder(listOfFolders: %s, size:%s)", listOfFolders, listOfFolders.size()); + gcs.deleteFolders(listOfFolders); + } + + /** Deletes all objects in the given path list followed by all bucket items. */ + /** Deletes all items in the given path list followed by all bucket items. */ private void deleteInternal(List itemsToDelete, List bucketsToDelete) throws IOException { + + deleteObjects(itemsToDelete); + deleteBucket(bucketsToDelete); + } + + /** Deleted all objects, folders and buckets in the order mentioned */ + private void deleteInternalWithFolders( + List itemsToDelete, List listOfFolders, List bucketsToDelete) + throws IOException { + deleteObjects(itemsToDelete); + deleteFolders(listOfFolders); + deleteBucket(bucketsToDelete); + } + + /** Helper function to delete objects */ + private void deleteObjects(List itemsToDelete) throws IOException { // TODO(user): We might need to separate out children into separate batches from parents to // avoid deleting a parent before somehow failing to delete a child. // Delete children before their parents. // // Note: we modify the input list, which is ok for current usage. - // We should make a copy in case that changes in the future. + // We should make a copy in case that changes in future. itemsToDelete.sort(FILE_INFO_PATH_COMPARATOR.reversed()); if (!itemsToDelete.isEmpty()) { @@ -381,7 +485,10 @@ private void deleteInternal(List itemsToDelete, List buckets } gcs.deleteObjects(objectsToDelete); } + } + /** Helper function to delete buckets */ + private void deleteBucket(List bucketsToDelete) throws IOException { if (!bucketsToDelete.isEmpty()) { List bucketNames = new ArrayList<>(bucketsToDelete.size()); for (FileInfo bucketInfo : bucketsToDelete) { diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java index 71e5401189..77656d99f0 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java @@ -16,6 +16,7 @@ package com.google.cloud.hadoop.gcsio; +import static com.google.cloud.hadoop.gcsio.FolderInfo.BUCKET_PREFIX; import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageExceptions.createFileNotFoundException; import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageExceptions.createJsonResponseException; import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo.createInferredDirectory; @@ -67,6 +68,7 @@ import com.google.cloud.hadoop.util.RetryHttpInitializer; import com.google.cloud.hadoop.util.TraceOperation; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; @@ -78,6 +80,7 @@ import com.google.common.io.BaseEncoding; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.storage.control.v2.*; +import com.google.storage.control.v2.StorageControlClient.ListFoldersPagedResponse; import java.io.FileNotFoundException; import java.io.IOException; import java.lang.reflect.Field; @@ -90,7 +93,9 @@ import java.util.Collection; import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; @@ -783,6 +788,30 @@ public void deleteObjects(List fullObjectNames) throws IOExce } } + /** See {@link GoogleCloudStorage#deleteFolders(List)} for details about expected behavior. */ + @Override + public void deleteFolders(List folders) throws IOException { + String traceContext = String.format("batchFolderDelete(size=%s)", folders.size()); + DeleteFolderOperation deleteFolderOperation = + new DeleteFolderOperation(folders, storageOptions, lazyGetStorageControlClient()); + try (ITraceOperation to = TraceOperation.addToExistingTrace(traceContext)) { + deleteFolderOperation.performDeleteOperation(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException( + String.format( + "Recieved thread interruption exception while deletion of folder resource : %s", + e.getMessage()), + e); + } + + if (!deleteFolderOperation.encounteredNoExceptions()) { + GoogleCloudStorageEventBus.postOnException(); + throw GoogleCloudStorageExceptions.createCompositeException( + deleteFolderOperation.getAllExceptions()); + } + } + /** Helper to create a callback for a particular deletion request. */ private JsonBatchCallback getDeletionCallback( StorageResourceId resourceId, @@ -1574,6 +1603,80 @@ public ListPage listObjectInfoPage( return new ListPage<>(objectInfos, nextPageToken); } + /** + * @see GoogleCloudStorage#listFolderInfoForPrefixPage(String, String, ListFolderOptions, String) + */ + @Override + public ListPage listFolderInfoForPrefixPage( + String bucketName, + String objectNamePrefix, + ListFolderOptions listFolderOptions, + String pageToken) + throws IOException { + logger.atFiner().log( + "listFolderInfoForPrefixPage(%s, %s, %s, %s)", + bucketName, objectNamePrefix, listFolderOptions, pageToken); + + ListFoldersRequest.Builder listFoldersRequest = + createFolderListRequest(bucketName, objectNamePrefix, listFolderOptions, pageToken); + + if (!isNullOrEmpty(pageToken)) { + logger.atFiner().log("listFolderInfoForPrefixPage: next page %s", pageToken); + listFoldersRequest.setPageToken(pageToken); + } + + List listedFolders = new LinkedList<>(); + String nextPageToken = + listStorageFoldersAndPrefixesPage(listFoldersRequest.build(), listedFolders); + while (!isNullOrEmpty(nextPageToken)) { + nextPageToken = + listStorageFoldersAndPrefixesPage( + listFoldersRequest.setPageToken(nextPageToken).build(), listedFolders); + } + + return new ListPage<>(listedFolders, nextPageToken); + } + + private ListFoldersRequest.Builder createFolderListRequest( + String bucketName, + String objectNamePrefix, + ListFolderOptions listFolderOptions, + String pageToken) { + logger.atFiner().log( + "createListFolderRequest(%s, %s, %s, %s)", + bucketName, objectNamePrefix, listFolderOptions, pageToken); + checkArgument(!isNullOrEmpty(bucketName), "bucketName must not be null or empty"); + + ListFoldersRequest.Builder request = + ListFoldersRequest.newBuilder() + .setPageSize(listFolderOptions.getPageSize()) + .setParent(BUCKET_PREFIX + bucketName); + + if (!Strings.isNullOrEmpty(objectNamePrefix)) { + request.setPrefix(objectNamePrefix); + } + return request; + } + + private String listStorageFoldersAndPrefixesPage( + ListFoldersRequest listFoldersRequest, List listedFolder) throws IOException { + checkNotNull(listedFolder, "Must provide a non-null container for listedFolder."); + + ListFoldersPagedResponse listFolderRespose = + storageControlClient.listFolders(listFoldersRequest); + try (ITraceOperation op = TraceOperation.addToExistingTrace("gcs.folders.list")) { + Iterator itemsIterator = listFolderRespose.getPage().getValues().iterator(); + while (itemsIterator.hasNext()) { + listedFolder.add(new FolderInfo(itemsIterator.next())); + } + op.annotate("resultSize", itemsIterator == null ? 0 : listedFolder.size()); + } + + logger.atFiner().log( + "listFolders(%s): listed %d objects", listFoldersRequest, listedFolder.size()); + return listFolderRespose.getNextPageToken(); + } + private List getGoogleCloudStorageItemInfos( String bucketName, String objectNamePrefix, diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListFolderOptions.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListFolderOptions.java new file mode 100644 index 0000000000..59b7f8f025 --- /dev/null +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListFolderOptions.java @@ -0,0 +1,46 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.hadoop.gcsio; + +import com.google.auto.value.AutoValue; + +/** + * Options that can be specified when listing Folders in the {@link GoogleCloudStorage} and is valid + * for only HN enabled bucket + */ +@AutoValue +public abstract class ListFolderOptions { + + /** List all folders in the directory. */ + public static final ListFolderOptions DEFAULT = builder().build(); + + public static Builder builder() { + return new AutoValue_ListFolderOptions.Builder().setPageSize(5000); + } + + public abstract Builder toBuilder(); + + /** Maximum folder resources in a single page */ + public abstract int getPageSize(); + + /** Builder for {@link ListObjectOptions} */ + @AutoValue.Builder + public abstract static class Builder { + + public abstract Builder setPageSize(int pageSize); + + public abstract ListFolderOptions build(); + } +} diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java index 379715709f..12538285fb 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java @@ -23,6 +23,7 @@ import com.google.api.client.util.Clock; import com.google.cloud.hadoop.gcsio.CreateBucketOptions; import com.google.cloud.hadoop.gcsio.CreateObjectOptions; +import com.google.cloud.hadoop.gcsio.FolderInfo; import com.google.cloud.hadoop.gcsio.GoogleCloudStorage; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageExceptions; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl; @@ -30,6 +31,7 @@ import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStrings; +import com.google.cloud.hadoop.gcsio.ListFolderOptions; import com.google.cloud.hadoop.gcsio.ListObjectOptions; import com.google.cloud.hadoop.gcsio.StorageResourceId; import com.google.cloud.hadoop.gcsio.UpdatableItemInfo; @@ -319,6 +321,11 @@ public synchronized void deleteObjects(List fullObjectNames) } } + @Override + public void deleteFolders(List folders) throws IOException { + throw new IOException("Not implemented"); + } + @Override public synchronized void copy( String srcBucketName, @@ -405,6 +412,16 @@ public ListPage listObjectInfoPage( listObjectInfo(bucketName, objectNamePrefix, listOptions), /* nextPageToken= */ null); } + @Override + public ListPage listFolderInfoForPrefixPage( + String bucketName, + String objectNamePrefix, + ListFolderOptions listFolderOptions, + String pageToken) + throws IOException { + throw new IOException("Not implemented"); + } + @Override public synchronized List listObjectInfo( String bucketName, String objectNamePrefix, ListObjectOptions listOptions) diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/DeleteFolderOperationTest.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/DeleteFolderOperationTest.java new file mode 100644 index 0000000000..dee9bc0677 --- /dev/null +++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/DeleteFolderOperationTest.java @@ -0,0 +1,148 @@ +/* + * Copyright 2024 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.hadoop.gcsio; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.common.base.Strings; +import com.google.common.util.concurrent.FutureCallback; +import com.google.storage.control.v2.StorageControlClient; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.Random; +import org.junit.Test; + +public class DeleteFolderOperationTest { + + private static final String BUCKET_NAME = "foo-bucket"; + + @Test + public void checkDeletionOrderForHnBucketBalancedFolders() throws InterruptedException { + String folderString = "test-folder-start/"; + List foldersToDelete = new LinkedList<>(); + + addFolders(foldersToDelete, folderString); + CustomDeleteFolderOperationTest deleteFolderOperation = + new CustomDeleteFolderOperationTest( + foldersToDelete, GoogleCloudStorageOptions.DEFAULT, null); + + List orderOfDeletion = deleteFolderOperation.getOrderOfDeletion(); + deleteFolderOperation.performDeleteOperation(); + assertThat(orderOfDeletion.size()).isEqualTo(foldersToDelete.size()); + + // Map to store the index at which a folder was deleted + HashMap deletionOrder = new HashMap<>(); + for (int i = 0; i < orderOfDeletion.size(); i++) { + deletionOrder.put(orderOfDeletion.get(i).getFolderName(), i); + } + + for (int i = 0; i < orderOfDeletion.size(); i++) { + FolderInfo curFolder = orderOfDeletion.get(i); + String curFolderName = curFolder.getFolderName(); + String parentFolderName = curFolder.getParentFolderName(); + + if (!Strings.isNullOrEmpty(parentFolderName)) { + assertThat(deletionOrder.get(parentFolderName) > deletionOrder.get(curFolderName)).isTrue(); + } + } + } + + @Test + public void checkDeletionOrderForHnBucketSkewedFolders() throws InterruptedException { + String folderString = "test-folder-start/"; + List foldersToDelete = new LinkedList<>(); + + for (int i = 0; i < 10; i++) { + foldersToDelete.add( + new FolderInfo(FolderInfo.createFolderInfoObject(BUCKET_NAME, folderString))); + folderString += ("test-folder-" + i + "/"); + } + + CustomDeleteFolderOperationTest deleteFolderOperation = + new CustomDeleteFolderOperationTest( + foldersToDelete, GoogleCloudStorageOptions.DEFAULT, null); + + deleteFolderOperation.performDeleteOperation(); + List orderOfDeletion = deleteFolderOperation.getOrderOfDeletion(); + assertThat(orderOfDeletion.size()).isEqualTo(foldersToDelete.size()); + for (int i = 1; i < orderOfDeletion.size(); i++) { + FolderInfo prev = orderOfDeletion.get(i - 1); + FolderInfo cur = orderOfDeletion.get(i); + assertThat(prev.getParentFolderName()).isEqualTo(cur.getFolderName()); + } + } + + private void addFolders(List foldersToDelete, String curFolderName) { + Random r = new Random(); + Queue q = new ArrayDeque<>(); + q.add(curFolderName); + + while (!q.isEmpty()) { + String top = q.poll(); + foldersToDelete.add(new FolderInfo(FolderInfo.createFolderInfoObject(BUCKET_NAME, top))); + if (foldersToDelete.size() > 2000) return; + + for (int i = 0; i < 3; i++) { + long nextFolderName = r.nextInt(100000); + q.add(top + nextFolderName + "/"); + } + } + } + + /** Custom DeleteFolderOperation class to store order of folder deletion */ + private class CustomDeleteFolderOperationTest extends DeleteFolderOperation { + + /* Stores the order of deletion of folder resources*/ + private List orderOfDeletion; + + CustomDeleteFolderOperationTest( + List folders, + GoogleCloudStorageOptions storageOptions, + StorageControlClient storageControlClient) { + super(folders, storageOptions, storageControlClient); + this.orderOfDeletion = new ArrayList<>(folders.size()); + } + + public List getOrderOfDeletion() { + return orderOfDeletion; + } + + public void queueSingleFolderDelete(final FolderInfo folder, final int attempt) { + addToToBatchExecutorQueue(() -> null, getDeletionCallback(folder)); + } + + private synchronized void addToOrderOfDeletion(FolderInfo folderDeleted) { + orderOfDeletion.add(folderDeleted); + } + + protected FutureCallback getDeletionCallback(final FolderInfo resourceId) { + return new FutureCallback() { + @Override + public synchronized void onSuccess(Void result) { + addToOrderOfDeletion(resourceId); + successfullDeletionOfFolderResource(resourceId); + } + + @Override + public void onFailure(Throwable t) { + // do nothing + } + }; + } + } +} diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/FolderInfoTest.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/FolderInfoTest.java new file mode 100644 index 0000000000..dd71583b10 --- /dev/null +++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/FolderInfoTest.java @@ -0,0 +1,100 @@ +/* + * Copyright 2024 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.hadoop.gcsio; + +import static com.google.cloud.hadoop.gcsio.FolderInfo.PATH; +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class FolderInfoTest { + + private final String BUCKET_NAME = "folder-info-test-bucket"; + private final String FOLDER_NAME = "test-parent-folder/test-folder-name"; + + @Test + public void checkForRootFolderWithNull() { + assertThrows( + "Folder resource has invalid bucket name", + IllegalStateException.class, + () -> new FolderInfo(FolderInfo.createFolderInfoObject(null, null))); + } + + @Test + public void checkForRootFolderWithEmptyString() { + assertThrows( + "Folder resource has invalid bucket name", + IllegalStateException.class, + () -> new FolderInfo(FolderInfo.createFolderInfoObject("", ""))); + } + + @Test + public void checkForBucketWithNullFolder() { + assertThrows( + "Folder resource has invalid folder name", + IllegalStateException.class, + () -> new FolderInfo(FolderInfo.createFolderInfoObject(BUCKET_NAME, null))); + } + + @Test + public void checkForBucketWithEmptyFolder() { + FolderInfo bucketFolderInfo = + new FolderInfo(FolderInfo.createFolderInfoObject(BUCKET_NAME, "")); + assertThat(bucketFolderInfo.getBucket()).isEqualTo(BUCKET_NAME); + assertThat(bucketFolderInfo.getFolderName()).isEqualTo(""); + assertThat(bucketFolderInfo.isBucket()).isTrue(); + assertThat(bucketFolderInfo.getParentFolderName()).isEqualTo(""); + } + + @Test + public void checkForNullBucket() { + assertThrows( + "Folder resource has invalid bucket name", + IllegalStateException.class, + () -> new FolderInfo(FolderInfo.createFolderInfoObject(null, FOLDER_NAME))); + } + + @Test + public void checkForEmptyBucket() { + assertThrows( + "Folder resource has invalid bucket name", + IllegalStateException.class, + () -> new FolderInfo(FolderInfo.createFolderInfoObject("", FOLDER_NAME))); + } + + @Test + public void checkForFolder() { + FolderInfo testFolderInfo = + new FolderInfo(FolderInfo.createFolderInfoObject(BUCKET_NAME, FOLDER_NAME)); + assertThat(testFolderInfo.getBucket()).isEqualTo(BUCKET_NAME); + assertThat(testFolderInfo.getFolderName()).isEqualTo(FOLDER_NAME + PATH); + assertThat(testFolderInfo.isBucket()).isFalse(); + assertThat(testFolderInfo.getParentFolderName()).isEqualTo("test-parent-folder/"); + } + + @Test + public void checkForFolderParent() { + FolderInfo testFolderInfo = + new FolderInfo(FolderInfo.createFolderInfoObject(BUCKET_NAME, "test-parent-folder")); + assertThat(testFolderInfo.getBucket()).isEqualTo(BUCKET_NAME); + assertThat(testFolderInfo.getFolderName()).isEqualTo("test-parent-folder" + PATH); + assertThat(testFolderInfo.isBucket()).isFalse(); + assertThat(testFolderInfo.getParentFolderName()).isEqualTo(""); + } +}