Skip to content

Commit

Permalink
Fixes bug in DeltaTableUtils.findDeltaTableRoot
Browse files Browse the repository at this point in the history
Previously, DeltaTableUtils.findDeltaTableRoot would throw an exception if it is passed a base path that is converted to a Uri with an empty path component (e.g. `s3://my-bucket`). This PR catches such cases and prepends a slash when combining a base path with a _delta_log subdirectory. It also adds a new test suite for DeltaTableUtils.

GitOrigin-RevId: ebf74770dc3b0cdfddeadb97114d38cb00802995
  • Loading branch information
LukasRupprecht authored and allisonport-db committed Mar 22, 2023
1 parent c2baa30 commit 2814897
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 1 deletion.
23 changes: 22 additions & 1 deletion core/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ object DeltaTableUtils extends PredicateHelper
var currentPath = path
while (currentPath != null && currentPath.getName != "_delta_log" &&
currentPath.getName != "_samples") {
val deltaLogPath = new Path(currentPath, "_delta_log")
val deltaLogPath = safeConcatPaths(currentPath, "_delta_log")
if (Try(fs.exists(deltaLogPath)).getOrElse(false)) {
return Option(currentPath)
}
Expand Down Expand Up @@ -397,4 +397,25 @@ object DeltaTableUtils extends PredicateHelper
def parseColToTransform(col: String): IdentityTransform = {
IdentityTransform(FieldReference(Seq(col)))
}

/**
* Uses org.apache.hadoop.fs.Path(Path, String) to concatenate a base path
* and a relative child path and safely handles the case where the base path represents
* a Uri with an empty path component (e.g. s3://my-bucket, where my-bucket would be
* interpreted as the Uri authority).
*
* In that case, the child path is converted to an absolute path at the root, i.e. /childPath.
* This prevents a "URISyntaxException: Relative path in absolute URI", which would be thrown
* by org.apache.hadoop.fs.Path(Path, String) because it tries to convert the base path to a Uri
* and then resolve the child on top of it. This is invalid for an empty base path and a
* relative child path according to the Uri specification, which states that if an authority
* is defined, the path component needs to be either empty or start with a '/'.
*/
def safeConcatPaths(basePath: Path, relativeChildPath: String): Path = {
if (basePath.toUri.getPath.isEmpty) {
new Path(basePath, s"/$relativeChildPath")
} else {
new Path(basePath, relativeChildPath)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (2021) The Delta Lake Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.delta

// scalastyle:off import.ordering.noEmptyLine
import org.apache.hadoop.fs.Path

import org.apache.spark.SparkConf
import org.apache.spark.sql.test.SharedSparkSession

class DeltaTableUtilsSuite extends SharedSparkSession {

test("findDeltaTableRoot correctly combines paths") {
withTempDir { dir =>
sql(s"CREATE TABLE myTable (id INT) USING DELTA LOCATION '${dir.getAbsolutePath}'")
val path = new Path(s"file://${dir.getAbsolutePath}")
assert(DeltaTableUtils.findDeltaTableRoot(spark, path).contains(path))
}
}

test("safeConcatPaths") {
val basePath = new Path("s3://my-bucket/subfolder")
val basePathEmpty = new Path("s3://my-bucket")
assert(DeltaTableUtils.safeConcatPaths(basePath, "_delta_log") ==
new Path("s3://my-bucket/subfolder/_delta_log"))
assert(DeltaTableUtils.safeConcatPaths(basePathEmpty, "_delta_log") ==
new Path("s3://my-bucket/_delta_log"))
}
}

0 comments on commit 2814897

Please sign in to comment.