Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try harder to make believable directories #3775

Merged
merged 10 commits into from
Oct 10, 2018
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ public abstract class CloudStorageConfiguration {
public abstract boolean stripPrefixSlash();

/**
* Returns {@code true} if paths with a trailing slash should be treated as fake directories.
* Returns {@code true} if directories and paths with a trailing slash should be treated as fake
* directories.
*
* <p>With this feature, if file "foo/bar.txt" exists then both "foo" and "foo/" will be
* treated as if they were existing directories.
*/
public abstract boolean usePseudoDirectories();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import com.google.cloud.storage.StorageOptions;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.base.Strings;
import com.google.common.collect.AbstractIterator;
import com.google.common.net.UrlEscapers;
import com.google.common.primitives.Ints;
Expand Down Expand Up @@ -332,7 +333,8 @@ private SeekableByteChannel newReadChannel(Path path, Set<? extends OpenOption>
}
}
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
// passing false since we just want to check if it ends with /
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(cloudPath);
}
return CloudStorageReadChannel.create(
Expand All @@ -348,7 +350,7 @@ private SeekableByteChannel newWriteChannel(Path path, Set<? extends OpenOption>
throws IOException {
initStorage();
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(cloudPath);
}
BlobId file = cloudPath.getBlobId();
Expand Down Expand Up @@ -439,7 +441,7 @@ public InputStream newInputStream(Path path, OpenOption... options) throws IOExc
public boolean deleteIfExists(Path path) throws IOException {
initStorage();
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
// if the "folder" is empty then we're fine, otherwise complain
// that we cannot act on folders.
try (DirectoryStream<Path> paths = Files.newDirectoryStream(path)) {
Expand Down Expand Up @@ -567,10 +569,13 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep
"File systems associated with paths don't agree on pseudo-directories.");
}
}
if (fromPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
// We refuse to use paths that end in '/'. If the user puts a folder name
// but without the '/', they'll get whatever error GCS will return normally
// (if any).
if (fromPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(fromPath);
}
if (toPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
if (toPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(toPath);
}

Expand Down Expand Up @@ -665,9 +670,6 @@ public void checkAccess(Path path, AccessMode... modes) throws IOException {
while (true) {
try {
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories() ) {
return;
}
boolean nullId;
if (isNullOrEmpty(userProject)) {
nullId = storage.get(
Expand All @@ -682,13 +684,26 @@ public void checkAccess(Path path, AccessMode... modes) throws IOException {
== null;
}
if (nullId) {
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage) ) {
// there is no such file, but we're not signalling error because the
// path is a pseudo-directory.
return;
}
throw new NoSuchFileException(path.toString());
}
break;
} catch (StorageException exs) {
// Will rethrow a StorageException if all retries/reopens are exhausted
retryHandler.handleStorageException(exs);
// we're being aggressive by retrying even on scenarios where we'd normally reopen.
} catch (IllegalArgumentException exs) {
if ( CloudStorageUtil.checkPath(path).seemsLikeADirectoryAndUsePseudoDirectories(storage) ) {
// there is no such file, but we're not signalling error because the
// path is a pseudo-directory.
return;
}
// Other cause for IAE, forward the exception.
throw exs;
}
}
}
Expand All @@ -708,19 +723,34 @@ public <A extends BasicFileAttributes> A readAttributes(
while (true) {
try {
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories() ) {
@SuppressWarnings("unchecked")
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
return result;
}
BlobInfo blobInfo;
if (isNullOrEmpty(userProject)) {
blobInfo = storage.get(cloudPath.getBlobId());
} else {
blobInfo = storage.get(cloudPath.getBlobId(), BlobGetOption.userProject(userProject));
BlobInfo blobInfo = null;
try {
BlobId blobId = cloudPath.getBlobId();
// Null or empty name won't give us a file, so skip. But perhaps it's a pseudodir.
if (!isNullOrEmpty(blobId.getName())) {
if (isNullOrEmpty(userProject)) {
blobInfo = storage.get(blobId);
} else {
blobInfo = storage.get(blobId, BlobGetOption.userProject(userProject));
}
}
} catch (IllegalArgumentException ex) {
// the path may be invalid but look like a folder. In that case, return a folder.
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
@SuppressWarnings("unchecked")
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
return result;
}
// No? Propagate.
throw ex;
}
// null size indicate a file that we haven't closed yet, so GCS treats it as not there yet.
if ( null == blobInfo || blobInfo.getSize() == null ) {
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
@SuppressWarnings("unchecked")
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
return result;
}
throw new NoSuchFileException(
"gs://" + cloudPath.getBlobId().getBucket() + "/" + cloudPath.getBlobId().getName());
}
Expand Down Expand Up @@ -778,7 +808,13 @@ public DirectoryStream<Path> newDirectoryStream(Path dir, final Filter<? super P
// Loop will terminate via an exception if all retries are exhausted
while (true) {
try {
final String prefix = cloudPath.toRealPath().toString();
String prePrefix = cloudPath.normalize().toRealPath().toString();
// we can recognize paths without the final "/" as folders,
// but storage.list doesn't do the right thing with those, we need to append a "/".
if (!prePrefix.isEmpty() && !prePrefix.endsWith("/")) {
prePrefix += "/";
}
final String prefix = prePrefix;
Page<Blob> dirList;
if (isNullOrEmpty(userProject)) {
dirList = storage.list(cloudPath.bucket(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.Storage;
import com.google.common.collect.UnmodifiableIterator;

import java.io.File;
Expand Down Expand Up @@ -83,8 +86,50 @@ boolean seemsLikeADirectory() {
return path.seemsLikeADirectory();
}

boolean seemsLikeADirectoryAndUsePseudoDirectories() {
return path.seemsLikeADirectory() && fileSystem.config().usePseudoDirectories();
// True if this path may be a directory (and pseudo-directories are enabled)
// Checks:
// 1) does the path end in / ?
// 2) (optional, if storage is set) is there a file whose name starts with path+/ ?
boolean seemsLikeADirectoryAndUsePseudoDirectories(Storage storage) {
if (!fileSystem.config().usePseudoDirectories()) {
chingor13 marked this conversation as resolved.
Show resolved Hide resolved
return false;
}
if (path.seemsLikeADirectory()) {
return true;
}
// fancy case: the file name doesn't end in slash, but we've been asked to have pseudo dirs.
// Let's see if there are any files with this prefix.
if (storage == null) {
// we are in a context where we don't want to access the storage, so we conservatively
// say this isn't a directory.
return false;
}
// Using the provided path + "/" as a prefix, can we find one file? If so, the path
// is a directory.
String prefix = path.removeBeginningSeparator().toString();
if (!prefix.endsWith("/")) {
prefix += "/";
}
Page<Blob> list = storage.list(
this.bucket(),
Storage.BlobListOption.prefix(prefix),
// we only look at the first result, so no need for a bigger page.
Storage.BlobListOption.pageSize(1));
jean-philippe-martin marked this conversation as resolved.
Show resolved Hide resolved
for (Blob b : list.getValues()) {
// if this blob starts with our prefix and then a slash, then prefix is indeed a folder!
if (b.getBlobId() == null) {
continue;
}
String name = b.getBlobId().getName();
if (name == null) {
continue;
}
if (("/" + name).startsWith(this.path.toAbsolutePath() + "/")) {
return true;
}
}
// no match, so it's not a directory
return false;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
throws StorageException {
String delimiter = null;
String preprefix = "";
long maxResults = Long.MAX_VALUE;
for (Map.Entry<Option, ?> e : options.entrySet()) {
switch (e.getKey()) {
case PREFIX:
Expand All @@ -138,6 +139,9 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
case FIELDS:
// ignore and return all the fields
break;
case MAX_RESULTS:
maxResults = (Long) e.getValue();
break;
default:
throw new UnsupportedOperationException("Unknown option: " + e.getKey());
}
Expand All @@ -156,6 +160,16 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
values.add(so);
}
values.addAll(folders.values());

// truncate to max allowed length
if (values.size() > maxResults) {
List<StorageObject> newValues = new ArrayList<>();
for (int i=0; i < maxResults; i++) {
newValues.add(values.get(i));
}
values = newValues;
}

// null cursor to indicate there is no more data (empty string would cause us to be called again).
// The type cast seems to be necessary to help Java's typesystem remember that collections are iterable.
return Tuple.of(null, (Iterable<StorageObject>) values);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
Expand All @@ -54,6 +55,7 @@
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -397,6 +399,53 @@ public void testExists_trailingSlash_disablePseudoDirectories() throws Exception
}
}

@Test
public void testFakeDirectories() throws IOException {
try (FileSystem fs = forBucket("military")) {
List<Path> paths = new ArrayList<>();
paths.add(fs.getPath("dir/angel"));
paths.add(fs.getPath("dir/deepera"));
paths.add(fs.getPath("dir/deeperb"));
paths.add(fs.getPath("dir/deeper_"));
paths.add(fs.getPath("dir/deeper.sea/hasfish"));
paths.add(fs.getPath("dir/deeper/fish"));
for (Path path : paths) {
Files.createFile(path);
}

// ends with slash, must be a directory
assertThat(Files.isDirectory(fs.getPath("dir/"))).isTrue();
// files are not directories
assertThat(Files.exists(fs.getPath("dir/angel"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/angel"))).isFalse();
// directories are recognized even without the trailing "/"
assertThat(Files.isDirectory(fs.getPath("dir"))).isTrue();
// also works for absolute paths
assertThat(Files.isDirectory(fs.getPath("/dir"))).isTrue();
// non-existent files are not directories (but they don't make us crash)
assertThat(Files.isDirectory(fs.getPath("di"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("dirs"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("dir/deep"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("dir/deeper/fi"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("/dir/deeper/fi"))).isFalse();
// also works for subdirectories
assertThat(Files.isDirectory(fs.getPath("dir/deeper/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("/dir/deeper/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("/dir/deeper"))).isTrue();
// dot and .. folders are directories
assertThat(Files.isDirectory(fs.getPath("dir/deeper/."))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper/.."))).isTrue();
// dots in the name are fine
assertThat(Files.isDirectory(fs.getPath("dir/deeper.sea/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper.sea"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper.seax"))).isFalse();
// the root folder is a directory
assertThat(Files.isDirectory(fs.getPath("/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath(""))).isTrue();
}
}

@Test
public void testDelete() throws Exception {
Files.write(Paths.get(URI.create("gs://love/fashion")), "(✿◕ ‿◕ )ノ".getBytes(UTF_8));
Expand Down
Loading