Skip to content

Commit

Permalink
Merge pull request #88 from teamclairvoyant/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
rahulbhatia023 authored Jul 31, 2023
2 parents 3403edf + c5d338e commit 905dad0
Show file tree
Hide file tree
Showing 129 changed files with 2,504 additions and 699 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
.bloop
.bsp
.metals
.venv
.vscode
.DS_Store

project
target

docs/build
.idea
.bsp
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ credentials += Credentials(
"<github_token>"
)

libraryDependencies += "com.clairvoyant.restonomer" %% "restonomer-core" % "2.1.0"
libraryDependencies += "com.clairvoyant.restonomer" %% "restonomer-core" % "2.2.0"
```

`<github_token>` is the Personal Access Token with the permission to read packages.
Expand Down
55 changes: 49 additions & 6 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ lazy val scalacOptions = Seq("-Xmax-inlines", "50")
// ----- VARIABLES ----- //

val organizationName = "com.clairvoyant.restonomer"
val releaseVersion = "2.1.0"
val releaseVersion = "2.2.0"

val zioConfigVersion = "4.0.0-RC14"
val sttpVersion = "3.8.13"
Expand All @@ -21,6 +21,10 @@ val odelayVersion = "0.4.0"
val s3MockVersion = "0.2.6"
val scalaXmlVersion = "2.1.0"
val scalaParserCombinatorsVersion = "2.2.0"
val gcsConnectorVersion = "hadoop3-2.2.2"
val monovoreDeclineVersion = "2.4.1"
val googleCloudStorageVersion = "2.24.0"
val testContainersScalaVersion = "0.40.17"

// ----- TOOL DEPENDENCIES ----- //

Expand All @@ -46,11 +50,15 @@ val scalaParserCombinatorsDependencies = Seq(

val sparkDependencies = Seq(
"org.apache.spark" %% "spark-core" % sparkVersion,
"org.apache.spark" %% "spark-sql" % sparkVersion,
"org.apache.spark" %% "spark-hadoop-cloud" % sparkVersion
"org.apache.spark" %% "spark-sql" % sparkVersion
)
.map(_ excludeAll ("org.scala-lang.modules", "scala-xml"))
.map(_.cross(CrossVersion.for3Use2_13))
.map(_ % "provided")

val sparkHadoopCloudDependencies = Seq("org.apache.spark" %% "spark-hadoop-cloud" % sparkVersion)
.map(_ exclude ("org.apache.hadoop", "hadoop-client-runtime"))
.map(_.cross(CrossVersion.for3Use2_13))

val catsDependencies = Seq("org.typelevel" %% "cats-core" % catsVersion)

Expand All @@ -62,22 +70,36 @@ val s3MockDependencies = Seq("io.findify" %% "s3mock" % s3MockVersion % "it,test
.map(_ excludeAll ("org.scala-lang.modules", "scala-collection-compat"))
.map(_.cross(CrossVersion.for3Use2_13))

val gcsConnectorDependencies = Seq("com.google.cloud.bigdataoss" % "gcs-connector" % gcsConnectorVersion)

val monovoreDeclineDependencies = Seq("com.monovore" %% "decline" % monovoreDeclineVersion)

val googleCloudStorageDependencies = Seq("com.google.cloud" % "google-cloud-storage" % googleCloudStorageVersion)

val testContainersScalaDependencies = Seq("com.dimafeng" %% "testcontainers-scala" % testContainersScalaVersion % Test)

// ----- MODULE DEPENDENCIES ----- //

val restonomerCoreDependencies =
zioConfigDependencies ++
scalaXmlDependencies ++
scalaParserCombinatorsDependencies ++
sparkDependencies ++
sttpDependencies ++
jwtDependencies ++
jsonPathDependencies ++
scalaTestDependencies.map(_ % "it,test") ++
wireMockDependencies ++
s3MockDependencies ++
odelayDependencies
odelayDependencies ++
gcsConnectorDependencies ++
monovoreDeclineDependencies ++
googleCloudStorageDependencies ++
testContainersScalaDependencies

val restonomerSparkUtilsDependencies =
sparkDependencies ++
sparkHadoopCloudDependencies ++
catsDependencies ++
scalaTestDependencies.map(_ % "test")

Expand All @@ -93,7 +115,8 @@ val restonomerCoreSettings =
commonSettings ++ Seq(
libraryDependencies ++= restonomerCoreDependencies,
Test / parallelExecution := false,
IntegrationTest / parallelExecution := false
IntegrationTest / parallelExecution := false,
assembly / mainClass := Some("com.clairvoyant.restonomer.core.app.RestonomerApp")
) ++ Defaults.itSettings

val restonomerSparkUtilsSettings =
Expand All @@ -108,18 +131,21 @@ lazy val restonomer = (project in file("."))
commonSettings ++ Seq(
publish / skip := true,
publishLocal / skip := true
)
),
addCommandAlias("run", "restonomer-core/run")
)
.aggregate(`restonomer-core`, `restonomer-spark-utils`)

lazy val `restonomer-core` = project
.configs(IntegrationTest)
.settings(restonomerCoreSettings)
.dependsOn(`restonomer-spark-utils` % "compile->compile;test->test;it->it;test->it")
.enablePlugins(AssemblyPlugin)

lazy val `restonomer-spark-utils` = project
.configs(IntegrationTest.extend(Test))
.settings(restonomerSparkUtilsSettings)
.enablePlugins(AssemblyPlugin)

// ----- PUBLISH TO GITHUB PACKAGES ----- //

Expand All @@ -131,3 +157,20 @@ ThisBuild / credentials += Credentials(
"teamclairvoyant",
System.getenv("GITHUB_TOKEN")
)

// ----- ASSEMBLY MERGE STRATEGY ----- //

ThisBuild / assemblyMergeStrategy := {
case PathList(ps @ _*)
if (ps.last endsWith "io.netty.versions.properties")
|| (ps.last endsWith "reflection-config.json")
|| (ps.last endsWith "native-image.properties")
|| (ps.last endsWith "module-info.class")
|| (ps.last endsWith "UnusedStubClass.class") =>
MergeStrategy.last
case PathList(ps @ _*) if ps.last endsWith "public-suffix-list.txt" =>
MergeStrategy.concat
case x =>
val oldStrategy = (ThisBuild / assemblyMergeStrategy).value
oldStrategy(x)
}
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
project = 'restonomer'
copyright = '2023, Clairvoyant'
author = 'Clairvoyant'
release = '2.1.0'
release = '2.2.0'

extensions = ['myst_parser']

Expand Down
4 changes: 3 additions & 1 deletion docs/source/restonomer_getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ credentials += Credentials(
"<github_token>"
)

libraryDependencies += "com.clairvoyant.restonomer" %% "restonomer-core" % "2.1.0"
libraryDependencies += "com.clairvoyant.restonomer" %% "restonomer-core" % "2.2.0"
```

`<github_token>` is the Personal Access Token with the permission to read packages.
Expand All @@ -31,6 +31,8 @@ libraryDependencies += "com.clairvoyant.restonomer" %% "restonomer-core" % "2.1.
User can create the restonomer context instance by passing the restonomer context directory path to the constructor
of RestonomerContext class.

Currently, user can provide the local file system path or GCS path for the restonomer context directory.

```scala
private val restonomerContextDirectoryPath = "<restonomer_context_directory_path>"
private val restonomerContext = RestonomerContext(restonomerContextDirectoryPath)
Expand Down
28 changes: 28 additions & 0 deletions docs/source/restonomer_persistence.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,31 @@ This can be done in either of below 2 ways:
"spark.hadoop.fs.s3a.secret.key" = "<AWS_SECRET_KEY>"
}
```

## GCSBucket

When user wants to use GCSBucket service from GCP for persistence, he can use GCSBucket persistence feature from restonomer.

The GCSBucket persistence needs below arguments from the user:

| Input Arguments | Mandatory | Default Value | Description |
|:---------------------------|:---------:|:-------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------|
| service-account-cred-file | Yes | - | The filepath of the GCP service account credentials |
| bucket-name | Yes | - | The name of the Gcs bucket where the dataframe files need to be stored |
| file-format | Yes | - | The format (json/csv/parquet) of the files to be persisted for the response dataframe |
| file-path | Yes | - | The path of the directory where output files will be persisted |
| save-mode | No | ErrorIfExists | This is used to specify the expected behavior of saving a DataFrame to a data source.<br/> Expected values are (append, overwrite, errorifexists, ignore) |

User can configure the S3Bucket persistence in the below manner:

```hocon
persistence = {
type = "GCSBucket"
service-account-cred-file = "/user/secret/creds/gcs-cred.json"
bucket-name = "test-bucket"
file-format = "JSON"
file-path = "test-output-dir"
}
```

Now, in order to make this work, user needs to provide a correct service account credentials file.
42 changes: 42 additions & 0 deletions docs/source/restonomer_request_body.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,46 @@ data = {
}
}
}
```

## JSONDataBody

In its simplest form, the request’s body can be set as a json string.

A json body can be set on a request in checkpoint file in the below manner:

```hocon
body = {
type = "JSONDataBody"
data = """{"k1": "v1", "k2", "v2"}"""
}
```

Below is the full example of checkpoint file:

```hocon
name = "checkpoint_text_data_request_body"
data = {
data-request = {
url = "http://localhost:8080/text-data-request-body"
body = {
type = "JSONDataBody"
data = """{"k1": "v1", "k2", "v2"}"""
}
}
data-response = {
body = {
type = "JSON"
}
persistence = {
type = "FileSystem"
file-format = "JSON"
file-path = "/tmp/body"
}
}
}
```
Loading

0 comments on commit 905dad0

Please sign in to comment.