Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

external rules loaded as json with UT #40

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
*.iml
#local spark context data from unit tests
spark-warehouse/
#scala worksheets
*.sc

#Build dirctory for maven/sbt
target/
Expand All @@ -11,3 +13,4 @@ project/target/
/target/
/project/build.properties
/src/main/scala/com/databricks/labs/validation/LocalTest.scala
.bsp/sbt.json
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,18 @@ val specializedRules = Array(
)
RuleSet(df, by = "store").add(specializedRules)
```

## List of Rules as JSON
An array of list of rules can be initialised from an external file containing valid json
Pass the Json as String as following
```scala
val jsonParserInstance = new JsonRuleParser()
val rulesArray = jsonParserInstance.parseRules(jsonString)
```
The Array of Rules can then be used to in your Rule Set
Note: Currently this interface does not support MinMaxRule initialisation but will
be added in the next iteration

Common Real World Example
```scala
case class GlobalRules(regionID: Int, bu: String, subOrg: String, rules: Array[Rule]*)
Expand Down
9 changes: 9 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ libraryDependencies += "org.apache.spark" %% "spark-core" % "3.0.1" % Provided
libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.0.1" % Provided
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.6" % Test


val circeVersion = "0.14.1"

libraryDependencies ++= Seq(
"io.circe" %% "circe-core",
"io.circe" %% "circe-generic",
"io.circe" %% "circe-parser"
).map(_ % circeVersion)

run in Compile := Defaults.runTask(fullClasspath in Compile, mainClass in(Compile, run), runner in(Compile, run)).evaluated
runMain in Compile := Defaults.runMainTask(fullClasspath in Compile, runner in(Compile, run)).evaluated

Expand Down
197 changes: 197 additions & 0 deletions src/main/scala/com/databricks/labs/validation/RuleParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package com.databricks.labs.validation

import io.circe.{Decoder, Json, parser}
import com.databricks.labs.validation.utils.Structures.Bounds
import org.apache.spark.sql.functions.col

import scala.io.Source




sealed trait RuleParser{
/**
* Define trait to enable extension of generic Rule Parser to provide a body support multiple input types
* */

val parserType:String
/** Identifier to be implemented by child class specifying format it supports*/
def readRules(filePath:String):String
/** @param filePath The specific path where the json file containing the rules reside */
def parseRules(rules:String):Array[Rule]
/** @param rules input object of genric Type T containing the parsed rules
* returns Array of Individual Rules specified in JSON object*/
}




class JsonRuleParser extends RuleParser {
/**
* Implementation of RuleParser to support external rules in JSON Format
*
*
* */
override final val parserType ="jsonParser"
def parseRules(rules:String):Array[Rule] = {
if (rules == null) {
val jsonRules = parser.decode[Array[Rule]](rules).right.get
jsonRules
}
else{
val jsonRules = parser.decode[Array[Rule]](readRules()).right.get
jsonRules
}
}

def readRules(filePath:String="rules.json"):String = {
val jsonRuleString: String = Source.fromResource(filePath).mkString
jsonRuleString
}

/**
* Implicit decoder types needed by CIRCE lib to parse individual json items to be parsed to the supported Rule objects
* */

private val _boundsDecoder:Decoder[Bounds]={
boundCursor =>
for {
lower <- boundCursor.get[Double]("lower")
upper <- boundCursor.get[Double]("upper")
lowerInclusive <- boundCursor.getOrElse[Boolean]("lowerInclusive")(false)
upperInclusive <- boundCursor.getOrElse[Boolean]("upperInclusive")(false)
} yield Bounds(lower , upper , lowerInclusive , upperInclusive )
}

private val _ruleDecoder1:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
bounds <- ruleCursor.get[Bounds]("Bounds")
} yield Rule(ruleName, column ,bounds)

}

private val _ruleDecoder2:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
} yield Rule(ruleName, column)
}

private val _ruleDecoder3:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validExpr <- ruleCursor.get[String]("validExpr").map(x => col(x))
} yield Rule(ruleName , column ,validExpr)
}

private val _ruleDecoder4:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validNumerics <- ruleCursor.get[Array[Double]]("validNumerics")
invertMatch <- ruleCursor.get[Boolean]("invertMatch")
} yield Rule(ruleName , column ,validNumerics,invertMatch)
}

private val _ruleDecoder5a:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validNumerics <- ruleCursor.get[Array[Double]]("validNumerics")

} yield Rule(ruleName , column ,validNumerics)

}

private val _ruleDecoder5b:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validNumerics <- ruleCursor.get[Array[Long]]("validNumerics")

} yield Rule(ruleName , column ,validNumerics)

}

private val _ruleDecoder5c:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validNumerics <- ruleCursor.get[Array[Int]]("validNumerics")

} yield Rule(ruleName , column ,validNumerics)

}

private val _ruleDecoder5d1:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validNumerics <- ruleCursor.get[Array[Double]]("validNumerics")
invertMatch <- ruleCursor.getOrElse[Boolean]("invertMatch")(false)

} yield Rule(ruleName , column ,validNumerics, invertMatch)

}

private val _ruleDecoder5d2:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validNumerics <- ruleCursor.get[Array[Int]]("validNumerics")
invertMatch <- ruleCursor.getOrElse[Boolean]("invertMatch")(false)

} yield Rule(ruleName , column ,validNumerics, invertMatch)

}

private val _ruleDecoder6:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validString <- ruleCursor.get[Array[String]]("validString")
ignoreCase <- ruleCursor.getOrElse[Boolean]("ignoreCase")(false)
invertMatch <- ruleCursor.getOrElse[Boolean]("invertMatch")(false)
} yield Rule(ruleName , column ,validString,ignoreCase,invertMatch)
}

private val _ruleDecoder7:Decoder[Rule]={
ruleCursor =>
for {
ruleName <- ruleCursor.get[String]("ruleName")
column <- ruleCursor.get[String]("column").map(x => col(x))
validString <- ruleCursor.get[Array[String]]("validString")
} yield Rule(ruleName , column ,validString)
}

implicit val boundsDecoder: Decoder[Bounds] = _boundsDecoder
implicit val ruleDecoder: Decoder[Rule] = {

_ruleDecoder3 or
_ruleDecoder4 or
_ruleDecoder5a or
_ruleDecoder5b or
_ruleDecoder5c or
_ruleDecoder5d1 or
_ruleDecoder5d2 or
_ruleDecoder6 or
_ruleDecoder7 or
_ruleDecoder1 or
_ruleDecoder2
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ object Structures {
val lowerLogic = if (lowerInclusive) c >= lower else c > lower
val upperLogic = if (upperInclusive) c <= upper else c < upper
lowerLogic && upperLogic
}
}
}

case class MinMaxRuleDef(ruleName: String, column: Column, bounds: Bounds, by: Column*)
Expand Down
53 changes: 53 additions & 0 deletions src/test/resources/rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
[
{
"ruleName":"ImplicitCoolingExpr",
"column":"booleanisithot"
},
{
"ruleName":"HeatingRateIntRulewith2Bounds",
"column":"heatingrate-coolingrate",
"Bounds":{
"lower":"0.01",
"upper":"1000.0",
"lowerInclusive" : true,
"upperInclusive" : true
}
},
{
"ruleName":"HeatingRateIntRulewith1bounds",
"column":"heatingrate-coolingrate",
"Bounds":{
"lower":"0.01",
"upper":"1000.0",
"lowerInclusive" : true
}
},
{
"ruleName":"HeatingRateIntRulewithCategoryLookup",
"column":"heatingrate-coolingrate",
"validString":["Good,Bad"]
},
{
"ruleName":"HeatingRateIntRulewithCategoryLookupIgnorecase",
"column":"heatingrate-coolingrate",
"validString":["Good,Bad"],
"ignoreCase":true
},
{
"ruleName":"HeatingRateIntRulewithCategoryLookupInvertmatch",
"column":"heatingrate-coolingrate",
"validString":["Good,Bad"],
"invertMatch":true
},
{
"ruleName":"HeatingRateIntRulewithCategoryLookupInt",
"column":"heatingrate-coolingrate",
"validNumerics":[1,10,100,1000]
},
{
"ruleName":"HeatingRateIntRulewithCategoryLookupIntInvertTrue",
"column":"heatingrate-coolingrate",
"validNumerics":[1,10,100,1000],
"invertMatch":true
}
]
Loading