Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sql-runner module and deployment example #2

Merged
merged 3 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
# Mobile Tools for Java (J2ME)
.mtj.tmp/

### IntelliJ IDEA ###
.idea/*
*.iws
*.iml
*.ipr

# Package Files #
*.jar
*.war
Expand All @@ -22,3 +28,5 @@
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
replay_pid*
target
dependency-reduced-pom.xml
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,16 @@
# flink-sql
# Flink SQL Runner

An application to execute Flink SQL jobs.

## Building and running Flink SQL Runner

1. Build application
```
mvn package
```
2. Build an image
```
minikube image build flink-sql-runner -t flink-sql-runner:latest
```

See /examples directory to see how to run FlinkDeployment using the SQL runner.
robobario marked this conversation as resolved.
Show resolved Hide resolved
24 changes: 24 additions & 0 deletions examples/FlinkDeployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: flink.apache.org/v1beta1
kind: FlinkDeployment
metadata:
name: flink-deployment-example
spec:
image: flink-sql-runner:latest
imagePullPolicy: Never
flinkVersion: v1_19
flinkConfiguration:
taskmanager.numberOfTaskSlots: "1"
serviceAccount: flink
jobManager:
resource:
memory: "2048m"
cpu: 1
taskManager:
resource:
memory: "2048m"
cpu: 1
job:
jarURI: local:///opt/flink/usrlib/flink-sql-runner.jar
args: ["<SQL_STATEMENTS>"]
parallelism: 1
upgradeMode: stateless
4 changes: 4 additions & 0 deletions flink-sql-runner/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM flink:1.19.1

RUN mkdir /opt/flink/usrlib
ADD target/flink-sql-runner-*.jar /opt/flink/usrlib/flink-sql-runner.jar
109 changes: 109 additions & 0 deletions flink-sql-runner/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.github.streamshub</groupId>
<artifactId>flink-sql</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>

<artifactId>flink-sql-runner</artifactId>

<properties>
<maven.compiler.release>11</maven.compiler.release>
SamBarker marked this conversation as resolved.
Show resolved Hide resolved
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>io.fabric8</groupId>
<artifactId>kubernetes-client</artifactId>
<version>${fabric8.kubernetes-client.version}</version>
</dependency>

<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-avro-confluent-registry</artifactId>
<version>${flink.avro.confluent.registry.version}</version>
</dependency>

<!-- Connectors -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>${flink.kafka.connector.version}</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.6.0</version>
<executions>
<!-- Run shade goal on package phase -->
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<excludes>
<exclude>org.apache.flink:flink-shaded-force-shading</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<!-- Do not copy the signatures in the META-INF folder.
Otherwise, this might cause SecurityExceptions when using the JAR. -->
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.github.streamshub.flink.SqlRunner</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package com.github.streamshub.flink;

import io.fabric8.kubernetes.api.model.Secret;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClientBuilder;
import io.fabric8.kubernetes.client.KubernetesClientException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class KubernetesSecretReplacer {
// Expected pattern for a secret is {{secret:<namespace>/<name>/<key>}}
private static final Pattern SECRET_PATTERN = Pattern.compile("\\{\\{secret:([^/]+)/([^}]+)/([^}]+)}}");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a comment above showing a couple of real examples might help readers understand what the regex is trying to pull apart.

static String interpolateSecrets(String input) {
Matcher matcher = SECRET_PATTERN.matcher(input);
StringBuffer result = new StringBuffer();

while (matcher.find()) {
String namespace = matcher.group(1);
String secretName = matcher.group(2);
String secretKey = matcher.group(3);
String secretValue = getSecretValue(namespace, secretName, secretKey);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should actually extract the resolver as an interface and inject that into this class (happy to do/see that work done separately) but it would make testing this a lot easier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have noted this in the issue for adding unit tests.

matcher.appendReplacement(result, secretValue != null ? secretValue : "");
tinaselenge marked this conversation as resolved.
Show resolved Hide resolved
}
matcher.appendTail(result);

return result.toString();
}

private static String getSecretValue(String namespace, String secretName, String secretKey) {
try (KubernetesClient client = new KubernetesClientBuilder().build()){
Secret secret = client.secrets().inNamespace(namespace).withName(secretName).get();
if (secret == null) {
throw new RuntimeException("Secret" + secretName + " does not exist");
}
if (secret.getData() != null && secret.getData().containsKey(secretKey)) {
return new String(Base64.getDecoder().decode(secret.getData().get(secretKey)), StandardCharsets.UTF_8);
} else {
throw new RuntimeException("Could not read data with key " + secretKey + "from secret " + secretName);
}
} catch (KubernetesClientException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.github.streamshub.flink;

import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** Parses and executes SQL statements. */
public class SqlRunner {

private static final Logger LOG = LoggerFactory.getLogger(SqlRunner.class);

private static final String STATEMENT_DELIMITER = ";"; // a statement should end with `;`
private static final String LINE_DELIMITER = "\n";

private static final Pattern SET_STATEMENT_PATTERN =
Pattern.compile("SET\\s+'(\\S+)'\\s+=\\s+'(.*)';", Pattern.CASE_INSENSITIVE);

private static final Pattern STATEMENT_SET_PATTERN =
Pattern.compile("(EXECUTE STATEMENT SET BEGIN.*?END;)", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);

public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new Exception("Exactly 1 argument is expected.");
}

var statements = parseStatements(args[0]);

EnvironmentSettings settings = EnvironmentSettings
.newInstance()
.inStreamingMode()
.build();
var tableEnv = TableEnvironment.create(settings);
LOG.debug("TableEnvironment config: " + tableEnv.getConfig().toMap());

for (String statement : statements) {
var processedStatement = KubernetesSecretReplacer.interpolateSecrets(statement);
Matcher setMatcher = SET_STATEMENT_PATTERN.matcher(statement.trim());

if (setMatcher.matches()) {
// Handle SET statements
String key = setMatcher.group(1);
String value = setMatcher.group(2);
LOG.debug("Setting configurations:\n{}={}", key, value);
tableEnv.getConfig().getConfiguration().setString(key, value);
} else {
LOG.info("Executing:\n{}", statement);
tableEnv.executeSql(processedStatement);
}
}
}

private static List<String> parseStatements(String rawStatements) {
var formatted = formatSqlStatements(rawStatements.trim());

var statements = new ArrayList<String>();
StringBuilder current = new StringBuilder();
Matcher matcher = STATEMENT_SET_PATTERN.matcher(formatted);

String statementSet = "";
String otherStatements = formatted;

if (matcher.find()) {
statementSet = matcher.group(1);
otherStatements = formatted.replace(statementSet, "").trim();
}

for (char c : otherStatements.toCharArray()) {
if (c == STATEMENT_DELIMITER.charAt(0)) {
current.append(c);
statements.add(current.toString().trim());
current = new StringBuilder();
} else {
current.append(c);
}
}

if (statementSet.length() > 0) {
statements.add(statementSet);
}

return statements;
}

private static String formatSqlStatements(String content) {
StringBuilder formatted = new StringBuilder();
formatted.append(content);
if (!content.endsWith(STATEMENT_DELIMITER)) {
formatted.append(STATEMENT_DELIMITER);
}
formatted.append(LINE_DELIMITER);
return formatted.toString();
}
}
9 changes: 9 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,18 @@

<!-- Maven plugin versions -->
<maven.compiler.version>3.10.1</maven.compiler.version>

<!-- Project dependency version -->
<flink.version>1.19.1</flink.version>
<kafka.version>3.7.0</kafka.version>
<slf4j.version>1.7.36</slf4j.version>
<fabric8.kubernetes-client.version>6.13.0</fabric8.kubernetes-client.version>
<flink.avro.confluent.registry.version>1.19.1</flink.avro.confluent.registry.version>
<flink.kafka.connector.version>3.2.0-1.19</flink.kafka.connector.version>
</properties>

<modules>
<module>flink-sql-runner</module>
</modules>

<build>
Expand Down