From 33282e21d2ca2f108676fd61be5883894ab1445c Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 31 Oct 2023 18:06:21 +0800 Subject: [PATCH] [KYUUBI #5579][AUTHZ] Support LogicalRelation don't have CatalogTable but have HadoopFsRelation --- ...uubi.plugin.spark.authz.serde.URIExtractor | 1 + .../src/main/resources/scan_command_spec.json | 28 +++++++++++++------ .../spark/authz/PrivilegesBuilder.scala | 5 ++++ .../spark/authz/serde/CommandSpec.scala | 15 +++++++++- .../spark/authz/serde/pathExtractors.scala | 8 ++++++ .../kyuubi/plugin/spark/authz/gen/Scans.scala | 3 +- .../ranger/RangerSparkExtensionSuite.scala | 28 +++++++++++++++++++ 7 files changed, 78 insertions(+), 10 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor index 0b77fa26eea..5e099a76f0f 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor @@ -15,4 +15,5 @@ # limitations under the License. # +org.apache.kyuubi.plugin.spark.authz.serde.HadoopFsRelationFileIndexURIExtractor org.apache.kyuubi.plugin.spark.authz.serde.StringURIExtractor diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json index 40a0d81c24f..c933bd47ed4 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json @@ -5,7 +5,8 @@ "fieldExtractor" : "CatalogTableTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.catalog.HiveTableRelation", "scanDescs" : [ { @@ -13,7 +14,8 @@ "fieldExtractor" : "CatalogTableTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.LogicalRelation", "scanDescs" : [ { @@ -21,7 +23,12 @@ "fieldExtractor" : "CatalogTableOptionTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "relation", + "fieldExtractor" : "HadoopFsRelationFileIndexURIExtractor", + "isInput" : false + } ] }, { "classname" : "org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation", "scanDescs" : [ { @@ -29,7 +36,8 @@ "fieldExtractor" : "DataSourceV2RelationTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveGenericUDF", "scanDescs" : [ ], @@ -43,7 +51,8 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveGenericUDTF", "scanDescs" : [ ], @@ -57,7 +66,8 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveSimpleUDF", "scanDescs" : [ ], @@ -71,7 +81,8 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveUDAFFunction", "scanDescs" : [ ], @@ -85,5 +96,6 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala index 8334992800d..625ef383163 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala @@ -22,6 +22,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} import org.slf4j.LoggerFactory import org.apache.kyuubi.plugin.spark.authz.OperationType.OperationType @@ -112,6 +113,10 @@ object PrivilegesBuilder { val cols = conditionList ++ aggCols buildQuery(a.child, privilegeObjects, projectionList, cols, spark) + case logicalRelation @ LogicalRelation(_: HadoopFsRelation, _, None, _) => + getScanSpec(logicalRelation).uris(logicalRelation) + .foreach(privilegeObjects += PrivilegeObject(_)) + case scan if isKnownScan(scan) && scan.resolved => getScanSpec(scan).tables(scan, spark).foreach(mergeProjection(_, scan)) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala index 7b306551cc3..14f3719b8a8 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala @@ -101,7 +101,8 @@ case class TableCommandSpec( case class ScanSpec( classname: String, scanDescs: Seq[ScanDesc], - functionDescs: Seq[FunctionDesc] = Seq.empty) extends CommandSpec { + functionDescs: Seq[FunctionDesc] = Seq.empty, + uriDescs: Seq[UriDesc] = Seq.empty) extends CommandSpec { override def opType: String = OperationType.QUERY.toString def tables: (LogicalPlan, SparkSession) => Seq[Table] = (plan, spark) => { scanDescs.flatMap { td => @@ -115,6 +116,18 @@ case class ScanSpec( } } + def uris: LogicalPlan => Seq[Uri] = plan => { + uriDescs.flatMap { ud => + try { + ud.extract(plan) + } catch { + case e: Exception => + LOG.debug(ud.error(plan, e)) + None + } + } + } + def functions: (Expression) => Seq[Function] = (expr) => { functionDescs.flatMap { fd => try { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/pathExtractors.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/pathExtractors.scala index 81fa8411b0d..1086074a8c9 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/pathExtractors.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/pathExtractors.scala @@ -17,6 +17,8 @@ package org.apache.kyuubi.plugin.spark.authz.serde +import org.apache.spark.sql.execution.datasources.HadoopFsRelation + trait URIExtractor extends (AnyRef => Option[Uri]) with Extractor object URIExtractor { @@ -33,3 +35,9 @@ class StringURIExtractor extends URIExtractor { Some(Uri(v1.asInstanceOf[String])) } } + +class HadoopFsRelationFileIndexURIExtractor extends URIExtractor { + override def apply(v1: AnyRef): Option[Uri] = { + Some(Uri(v1.asInstanceOf[HadoopFsRelation].location.rootPaths.map(_.toString).mkString(","))) + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala index 7771a2dd227..bb5906be827 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala @@ -37,7 +37,8 @@ object Scans extends CommandSpecs[ScanSpec] { ScanDesc( "catalogTable", classOf[CatalogTableOptionTableExtractor]) - ScanSpec(r, Seq(tableDesc)) + val uriDesc = UriDesc("relation", classOf[HadoopFsRelationFileIndexURIExtractor]) + ScanSpec(r, Seq(tableDesc), uriDescs = Seq(uriDesc)) } val DataSourceV2Relation = { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index d3f19068726..8bb1d26f156 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.plugin.spark.authz.ranger +import scala.reflect.io.File import scala.util.Try import org.apache.hadoop.security.UserGroupInformation @@ -1032,4 +1033,31 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { } } } + + test("HadoopFsRelation") { + val db1 = defaultDb + val table1 = "table1" + val tableDirectory = getClass.getResource("/").getPath + "table_directory" + val directory = File(tableDirectory).createDirectory() + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + doAs( + admin, + sql( + s""" + |INSERT OVERWRITE DIRECTORY '${directory.path}' + |USING parquet + |SELECT * FROM $db1.$table1""".stripMargin)) + + interceptContains[AccessControlException](doAs( + someone, + sql( + s""" + |SELECT * FROM parquet.`${directory.path}`""".stripMargin).explain(true)))( + s"does not have [select] privilege on " + + s"[[file:${directory.path}, file:${directory.path}/]]") + } + } + } }