diff --git a/README.md b/README.md index feab84a1..5f21797f 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,6 @@ Depending on your network and computer, startup time may take 3-5 minutes. Once ## Prerequisites Install Git (optional), Docker, Docker Compose. -Docker Desktop (or Orbstack) with Kubernetes enabled and helm CLI is required if you use helm-chart to deploy services. ## System Resource Requirements @@ -63,81 +62,21 @@ git clone git@github.com:apache/gravitino-playground.git cd gravitino-playground ``` -#### Docker - -##### Start +### Start ``` ./playground.sh docker start ``` -##### Check status +### Check status ```shell ./playground.sh docker status ``` -##### Stop playground +### Stop playground ```shell ./playground.sh docker stop ``` -#### Kubernetes - -Enable Kubernetes in Docker Desktop or Orbstack. - -In the project root directory, execute this command: - -``` -helm upgrade --install gravitino-playground ./helm-chart/ --create-namespace --namespace gravitino-playground --set projectRoot=$(pwd) -``` - -##### Start - -``` -./playground.sh k8s start -``` - -##### Check status -```shell -./playground.sh k8s status -``` - -##### Port Forwarding - -To access the pods or services at `localhost`, you need to do these steps: - -1. Log in to the Gravitino playground Trino pod using the following command: - -``` -TRINO_POD=$(kubectl get pods --namespace gravitino-playground -l app=trino -o jsonpath="{.items[0].metadata.name}") -kubectl exec $TRINO_POD -n gravitino-playground -it -- /bin/bash -``` -2. Log in to the Gravitino playground Spark pod using the following command: - -``` -SPARK_POD=$(kubectl get pods --namespace gravitino-playground -l app=spark -o jsonpath="{.items[0].metadata.name}") -kubectl exec $SPARK_POD -n gravitino-playground -it -- /bin/bash -``` - -3. Port-forward the Gravitino service to access it at `localhost:8090`. - -``` -kubectl port-forward svc/gravitino -n gravitino-playground 8090:8090 -``` - -4. Port-forward the Jupyter Notebook service to access it at `localhost:8888`. - -``` -kubectl port-forward svc/jupyternotebook -n gravitino-playground 8888:8888 -``` - -##### Stop playground -```shell -./playground.sh k8s stop -``` - - - - ## Experiencing Apache Gravitino with Trino SQL ### Using Trino CLI in Docker Container diff --git a/docker-compose.yaml b/docker-compose.yaml index be97d8d5..6020f085 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -65,9 +65,6 @@ services: - "8090:8090" - "9001:9001" container_name: playground-gravitino - environment: - - MYSQL_HOST_IP=mysql - - HIVE_HOST_IP=hive depends_on: ranger : condition: service_healthy @@ -160,10 +157,6 @@ services: entrypoint: /bin/bash /tmp/spark/init.sh environment: - HADOOP_USER_NAME=root - - GRAVITINO_HOST_IP=gravitino - - GRAVITINO_HOST_PORT=8090 - - HIVE_HOST_IP=hive - - TRINO_HOST_IP=trino ports: - "14040:4040" volumes: @@ -173,11 +166,6 @@ services: jupyter: image: jupyter/pyspark-notebook:spark-3.4.1 container_name: playground-jupyter - environment: - - GRAVITINO_HOST_IP=gravitino - - HIVE_HOST_IP=hive - - TRINO_HOST_IP=trino - - POSTGRES_HOST_IP=postgresql ports: - "18888:8888" volumes: diff --git a/healthcheck/gravitino-healthcheck.sh b/healthcheck/gravitino-healthcheck.sh index 48462b39..a825e215 100755 --- a/healthcheck/gravitino-healthcheck.sh +++ b/healthcheck/gravitino-healthcheck.sh @@ -23,10 +23,8 @@ max_attempts=3 attempt=0 success=false -HOST_IP=${GRAVITINO_HOST_IP:-localhost} - while [ $attempt -lt $max_attempts ]; do - response=$(curl -X GET -H "Content-Type: application/json" http://${HOST_IP}:8090/api/version) + response=$(curl -X GET -H "Content-Type: application/json" http://127.0.0.1:8090/api/version) if echo "$response" | grep -q "\"code\":0"; then success=true diff --git a/healthcheck/hive-healthcheck.sh b/healthcheck/hive-healthcheck.sh index f921f451..18764957 100755 --- a/healthcheck/hive-healthcheck.sh +++ b/healthcheck/hive-healthcheck.sh @@ -19,12 +19,8 @@ # set -ex -# Set Hive connection details -HOST_IP=${HIVE_HOST_IP:-localhost} -HIVE_PORT="10000" - # Attempt to connect to Hive using curl -curl -s -o /dev/null -w "%{http_code}" http://${HOST_IP}:${HIVE_PORT} +curl -s -o /dev/null -w "%{http_code}" http://localhost:10000 # Check the HTTP status code if [ $? -eq 0 ]; then diff --git a/healthcheck/mysql-healthcheck.sh b/healthcheck/mysql-healthcheck.sh index 83f18e6f..cd3b0669 100755 --- a/healthcheck/mysql-healthcheck.sh +++ b/healthcheck/mysql-healthcheck.sh @@ -19,8 +19,7 @@ # set -ex -HOST_IP=${MYSQL_HOST_IP:-localhost} -mysqladmin ping -h ${HOST_IP} -p${MYSQL_ROOT_PASSWORD} +mysqladmin ping -h localhost -p${MYSQL_ROOT_PASSWORD} if [ $? -eq 0 ]; then echo "MySQL container started successfully." exit 0 diff --git a/healthcheck/trino-healthcheck.sh b/healthcheck/trino-healthcheck.sh index a0750d8a..df741fc2 100755 --- a/healthcheck/trino-healthcheck.sh +++ b/healthcheck/trino-healthcheck.sh @@ -20,7 +20,7 @@ set -ex # Because trino-connector must first synchronize a default metalake from the Gravitino server -response=$(trino --server ${TRINO_HOST_IP}:8080 --execute "SHOW CATALOGS LIKE 'catalog_hive'") +response=$(trino --server localhost:8080 --execute "SHOW CATALOGS LIKE 'catalog_hive'") if echo "$response" | grep -q catalog_hive; then echo "Gravitino Trino connector has finished synchronizing metadata" else diff --git a/helm-chart/.helmignore b/helm-chart/.helmignore deleted file mode 100644 index d90886af..00000000 --- a/helm-chart/.helmignore +++ /dev/null @@ -1,29 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ - -# Ignore these directories because they are too large, we use local-path pv to mount it into Pod -init/*/data/ -init/*/packages/ - - diff --git a/helm-chart/Chart.yaml b/helm-chart/Chart.yaml deleted file mode 100644 index ef6fbb20..00000000 --- a/helm-chart/Chart.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: v2 -name: gravitino-playground -description: A Helm chart for Gravitino Playground -type: application -version: 0.1.0 -appVersion: "1.0.0" -maintainers: - - name: Your Name - email: your.email@example.com \ No newline at end of file diff --git a/helm-chart/healthcheck b/helm-chart/healthcheck deleted file mode 120000 index 0e8d7ebf..00000000 --- a/helm-chart/healthcheck +++ /dev/null @@ -1 +0,0 @@ -../healthcheck \ No newline at end of file diff --git a/helm-chart/init b/helm-chart/init deleted file mode 120000 index d9db1a18..00000000 --- a/helm-chart/init +++ /dev/null @@ -1 +0,0 @@ -../init \ No newline at end of file diff --git a/helm-chart/templates/NOTES.txt b/helm-chart/templates/NOTES.txt deleted file mode 100644 index ed129d91..00000000 --- a/helm-chart/templates/NOTES.txt +++ /dev/null @@ -1,24 +0,0 @@ -1. Log in to the Gravitino playground Trino pod using the following command: - -``` -TRINO_POD=$(kubectl get pods --namespace gravitino-playground -l app=trino -o jsonpath="{.items[0].metadata.name}") -kubectl exec $TRINO_POD -n gravitino-playground -it -- /bin/bash -``` -2. Log in to the Gravitino playground Spark pod using the following command: - -``` -SPARK_POD=$(kubectl get pods --namespace gravitino-playground -l app=spark -o jsonpath="{.items[0].metadata.name}") -kubectl exec $SPARK_POD -n gravitino-playground -it -- /bin/bash -``` - -3. Port-forwarding Gravitino Service, so that you can access it at `localhost:8090`. - -``` -kubectl port-forward svc/gravitino -n gravitino-playground 8090:8090 -``` - -4. Port-forwarding Jupyter Notebook Service, so that you can access it at `localhost:8888`. - -``` -kubectl port-forward svc/jupyternotebook -n gravitino-playground 8888:8888 -``` \ No newline at end of file diff --git a/helm-chart/templates/_helpers.tpl b/helm-chart/templates/_helpers.tpl deleted file mode 100644 index e40aca5a..00000000 --- a/helm-chart/templates/_helpers.tpl +++ /dev/null @@ -1,62 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "gravitino-playground.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "gravitino-playground.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "gravitino-playground.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "gravitino-playground.labels" -}} -helm.sh/chart: {{ include "gravitino-playground.chart" . }} -{{ include "gravitino-playground.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "gravitino-playground.selectorLabels" -}} -app.kubernetes.io/name: {{ include "gravitino-playground.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "gravitino-playground.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "gravitino-playground.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} diff --git a/helm-chart/templates/gravitino.yaml b/helm-chart/templates/gravitino.yaml deleted file mode 100644 index dce35df7..00000000 --- a/helm-chart/templates/gravitino.yaml +++ /dev/null @@ -1,84 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.gravitino.serviceName }} - namespace: {{ .Values.global.namespace }} - labels: - app: {{ .Values.gravitino.serviceName }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Values.gravitino.serviceName }} - template: - metadata: - labels: - app: {{ .Values.gravitino.serviceName }} - spec: - containers: - - name: gravitino - image: "{{ .Values.gravitino.image.repository }}:{{ .Values.gravitino.image.tag }}" - ports: - - containerPort: 8090 - - containerPort: 9001 - command: ["/bin/bash", "/tmp/gravitino/init.sh"] - env: - {{- toYaml .Values.gravitino.env | nindent 12 }} - volumeMounts: - - name: gravitino-healthcheck-scripts - mountPath: /tmp/healthcheck - - name: gravitino-artifacts - mountPath: /tmp/gravitino - readinessProbe: - exec: - command: - - /bin/sh - - -c - - /tmp/healthcheck/gravitino-healthcheck.sh - initialDelaySeconds: 10 - periodSeconds: 5 - failureThreshold: 3 - resources: - {{- toYaml .Values.gravitino.resources | nindent 12 }} - initContainers: - - name: wait-for-hive - image: "{{ .Values.gravitino.image.repository }}:{{ .Values.gravitino.image.tag }}" - command: ["/bin/bash", "/tmp/healthcheck/hive-healthcheck.sh"] - env: - {{- toYaml .Values.gravitino.env | nindent 12 }} - volumeMounts: - - name: gravitino-healthcheck-scripts - mountPath: /tmp/healthcheck - - name: wait-for-mysql - image: "{{ .Values.mysql.image.repository }}:{{ .Values.mysql.image.tag }}" - command: ["/bin/bash", "/tmp/healthcheck/mysql-healthcheck.sh"] - env: - {{- toYaml .Values.mysql.env | nindent 12 }} - volumeMounts: - - name: gravitino-healthcheck-scripts - mountPath: /tmp/healthcheck - volumes: - - name: gravitino-artifacts - hostPath: - path: {{ printf "%s/init/gravitino/" .Values.projectRoot }} - type: DirectoryOrCreate - - name: gravitino-healthcheck-scripts - hostPath: - path: {{ printf "%s/healthcheck/" .Values.projectRoot }} - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Values.gravitino.serviceName }} -spec: - selector: - app: {{ .Values.gravitino.serviceName }} - ports: - - port: 8090 - targetPort: 8090 - name: api - - port: 9001 - targetPort: 9001 - name: debug diff --git a/helm-chart/templates/hive.yaml b/helm-chart/templates/hive.yaml deleted file mode 100644 index e2df9653..00000000 --- a/helm-chart/templates/hive.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# templates/hive.yaml ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.hive.serviceName }} - namespace: {{ .Values.global.namespace }} - labels: - app: {{ .Values.hive.serviceName }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Values.hive.serviceName }} - template: - metadata: - labels: - app: {{ .Values.hive.serviceName }} - spec: - # we need this, otherwise, the hive hostname at location of spark table will - # be the pod name, not service name: - # Location|hdfs://hive-6c8fbfcf8f-482r4:9000/...| - # which cause pyspark unable to connect to hive - hostname: hive - containers: - - name: hive - image: "{{ .Values.hive.image.repository }}:{{ .Values.hive.image.tag }}" - ports: - - containerPort: 3306 - - containerPort: 9000 - - containerPort: 9083 - - containerPort: 10000 - - containerPort: 50070 - readinessProbe: - exec: - command: ["/tmp/check-status.sh"] - periodSeconds: 10 - timeoutSeconds: 60 - failureThreshold: 5 - env: - {{- toYaml .Values.hive.env | nindent 12 }} - volumeMounts: - - name: hive-artifacts - mountPath: /tmp/hive - resources: - {{- toYaml .Values.hive.resources | nindent 12 }} - command: ["/bin/bash", "/tmp/hive/init.sh"] - volumes: - - name: hive-artifacts - hostPath: - path: {{ printf "%s/init/hive/" .Values.projectRoot }} - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Values.hive.serviceName }} -spec: - selector: - app: {{ .Values.hive.serviceName }} - ports: - - port: 3306 - targetPort: 3306 - name: mysql - - port: 9000 - targetPort: 9000 - name: hdfs - - port: 9083 - targetPort: 9083 - name: metastore - - port: 10000 - targetPort: 10000 - name: hiveserver2 - - port: 50070 - targetPort: 50070 - name: namenode diff --git a/helm-chart/templates/jupyter.yaml b/helm-chart/templates/jupyter.yaml deleted file mode 100644 index 7322e246..00000000 --- a/helm-chart/templates/jupyter.yaml +++ /dev/null @@ -1,80 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.jupyter.serviceName }} - namespace: {{ .Values.global.namespace }} - labels: - app: {{ .Values.jupyter.serviceName }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Values.jupyter.serviceName }} - template: - metadata: - labels: - app: {{ .Values.jupyter.serviceName }} - spec: - containers: - - name: jupyter - image: "{{ .Values.jupyter.image.repository }}:{{ .Values.jupyter.image.tag }}" - ports: - - containerPort: 8888 - command: ["/bin/bash", "/tmp/gravitino/init.sh"] - securityContext: - privileged: true - runAsUser: 0 - env: - {{- toYaml .Values.jupyter.env | nindent 12 }} - volumeMounts: - - name: jupyter-artifacts - mountPath: /tmp/gravitino - - name: jupyter-healthcheck-scripts - mountPath: /tmp/healthcheck - resources: - {{- toYaml .Values.jupyter.resources | nindent 12 }} - initContainers: - - name: wait-for-gravitino - image: "{{ .Values.jupyter.image.repository }}:{{ .Values.jupyter.image.tag }}" - command: ["/bin/bash", "/tmp/healthcheck/gravitino-healthcheck.sh"] - env: - {{- toYaml .Values.jupyter.env | nindent 12 }} - volumeMounts: - - name: jupyter-healthcheck-scripts - mountPath: /tmp/healthcheck - - name: wait-for-hive - image: "{{ .Values.hive.image.repository }}:{{ .Values.hive.image.tag }}" - command: ["/bin/bash", "/tmp/healthcheck/hive-healthcheck.sh"] - env: - {{- toYaml .Values.jupyter.env | nindent 12 }} - volumeMounts: - - name: jupyter-healthcheck-scripts - mountPath: /tmp/healthcheck - - name: wait-for-trino - image: "{{ .Values.trino.image.repository }}:{{ .Values.trino.image.tag }}" - command: ["/bin/bash", "/tmp/healthcheck/trino-healthcheck.sh"] - env: - {{- toYaml .Values.jupyter.env | nindent 12 }} - volumeMounts: - - name: jupyter-healthcheck-scripts - mountPath: /tmp/healthcheck - volumes: - - name: jupyter-artifacts - hostPath: - path: {{ printf "%s/init/jupyter/" .Values.projectRoot }} - type: DirectoryOrCreate - - name: jupyter-healthcheck-scripts - hostPath: - path: {{ printf "%s/healthcheck/" .Values.projectRoot }} - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Values.jupyter.serviceName }} -spec: - selector: - app: {{ .Values.jupyter.serviceName }} - ports: - - port: 8888 - targetPort: 8888 \ No newline at end of file diff --git a/helm-chart/templates/mysql.yaml b/helm-chart/templates/mysql.yaml deleted file mode 100644 index b7efd76d..00000000 --- a/helm-chart/templates/mysql.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# templates/mysql.yaml ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.mysql.serviceName }} - namespace: {{ .Values.global.namespace }} - labels: - app: {{ .Values.mysql.serviceName }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Values.mysql.serviceName }} - template: - metadata: - labels: - app: {{ .Values.mysql.serviceName }} - spec: - containers: - - name: mysql - image: "{{ .Values.mysql.image.repository }}:{{ .Values.mysql.image.tag }}" - ports: - - containerPort: 3306 - env: - {{- toYaml .Values.mysql.env | nindent 12 }} - args: - - --default-authentication-plugin=mysql_native_password - - --character-set-server=utf8mb4 - - --collation-server=utf8mb4_general_ci - - --explicit_defaults_for_timestamp=true - - --lower_case_table_names=1 - volumeMounts: - - name: mysql-artifacts - mountPath: /docker-entrypoint-initdb.d - resources: - {{- toYaml .Values.mysql.resources | nindent 12 }} - volumes: - - name: mysql-artifacts - hostPath: - path: {{ printf "%s/init/mysql/" .Values.projectRoot }} - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Values.mysql.serviceName }} -spec: - selector: - app: {{ .Values.mysql.serviceName }} - ports: - - port: 3306 - targetPort: 3306 \ No newline at end of file diff --git a/helm-chart/templates/postgresql.yaml b/helm-chart/templates/postgresql.yaml deleted file mode 100644 index 5088f80c..00000000 --- a/helm-chart/templates/postgresql.yaml +++ /dev/null @@ -1,46 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.postgresql.serviceName }} - namespace: {{ .Values.global.namespace }} - labels: - app: {{ .Values.postgresql.serviceName }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Values.postgresql.serviceName }} - template: - metadata: - labels: - app: {{ .Values.postgresql.serviceName }} - spec: - containers: - - name: postgresql - image: "{{ .Values.postgresql.image.repository }}:{{ .Values.postgresql.image.tag }}" - ports: - - containerPort: 5432 - env: - {{- toYaml .Values.postgresql.env | nindent 12 }} - volumeMounts: - - name: postgresql-artifacts - mountPath: /docker-entrypoint-initdb.d - resources: - {{- toYaml .Values.postgresql.resources | nindent 12 }} - volumes: - - name: postgresql-artifacts - hostPath: - path: {{ printf "%s/init/postgres/" .Values.projectRoot }} - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Values.postgresql.serviceName }} -spec: - selector: - app: {{ .Values.postgresql.serviceName }} - ports: - - port: 5432 - targetPort: 5432 diff --git a/helm-chart/templates/spark.yaml b/helm-chart/templates/spark.yaml deleted file mode 100644 index 9f1610ad..00000000 --- a/helm-chart/templates/spark.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# templates/spark.yaml ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.spark.serviceName }} - namespace: {{ .Values.global.namespace }} - labels: - app: {{ .Values.spark.serviceName }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Values.spark.serviceName }} - template: - metadata: - labels: - app: {{ .Values.spark.serviceName }} - spec: - containers: - - name: spark - image: "{{ .Values.spark.image.repository }}:{{ .Values.spark.image.tag }}" - ports: - - containerPort: 4040 - command: ["/bin/bash", "/tmp/spark/init.sh"] - env: - {{- toYaml .Values.spark.env | nindent 12 }} - volumeMounts: - - name: spark-artifacts - mountPath: /tmp/spark - - name: spark-common-artifacts - mountPath: /tmp/common - resources: - {{- toYaml .Values.spark.resources | nindent 12 }} - volumes: - - name: spark-artifacts - hostPath: - path: {{ printf "%s/init/spark/" .Values.projectRoot }} - type: DirectoryOrCreate - - name: spark-common-artifacts - hostPath: - path: {{ printf "%s/init/common/" .Values.projectRoot }} - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Values.spark.serviceName }} -spec: - selector: - app: {{ .Values.spark.serviceName }} - ports: - - port: 4040 - targetPort: 4040 \ No newline at end of file diff --git a/helm-chart/templates/tests/test-connection.yaml b/helm-chart/templates/tests/test-connection.yaml deleted file mode 100644 index 7ed158bd..00000000 --- a/helm-chart/templates/tests/test-connection.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: "{{ include "gravitino-playground.fullname" . }}-test-connection" - labels: - {{- include "gravitino-playground.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": test -spec: - containers: - - name: wget - image: busybox - command: ['wget'] - args: ['{{ include "gravitino-playground.fullname" . }}:{{ .Values.service.port }}'] - restartPolicy: Never diff --git a/helm-chart/templates/trino.yaml b/helm-chart/templates/trino.yaml deleted file mode 100644 index b6d46a99..00000000 --- a/helm-chart/templates/trino.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# templates/trino.yaml ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.trino.serviceName }} - namespace: {{ .Values.global.namespace }} - labels: - app: {{ .Values.trino.serviceName }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ .Values.trino.serviceName }} - template: - metadata: - labels: - app: {{ .Values.trino.serviceName }} - spec: - containers: - - name: trino - image: "{{ .Values.trino.image.repository }}:{{ .Values.trino.image.tag }}" - ports: - - containerPort: 8080 - command: ["/bin/bash", "/tmp/trino/init.sh"] - env: - {{- toYaml .Values.trino.env | nindent 12 }} - volumeMounts: - - name: trino-artifacts - mountPath: /tmp/trino - - name: trino-common-init-scripts - mountPath: /tmp/common - resources: - {{- toYaml .Values.trino.resources | nindent 12 }} - initContainers: - - name: wait-for-hive - image: "{{ .Values.hive.image.repository }}:{{ .Values.hive.image.tag }}" - command: ["/bin/bash", "/tmp/healthcheck/hive-healthcheck.sh"] - env: - {{- toYaml .Values.trino.env | nindent 12 }} - volumeMounts: - - name: trino-healthcheck-scripts - mountPath: /tmp/healthcheck - - name: wait-for-gravitino - image: "{{ .Values.gravitino.image.repository }}:{{ .Values.gravitino.image.tag }}" - command: ["/bin/bash", "/tmp/healthcheck/gravitino-healthcheck.sh"] - env: - {{- toYaml .Values.trino.env | nindent 12 }} - volumeMounts: - - name: trino-healthcheck-scripts - mountPath: /tmp/healthcheck - volumes: - - name: trino-artifacts - hostPath: - path: {{ printf "%s/init/trino/" .Values.projectRoot }} - type: DirectoryOrCreate - - name: trino-common-init-scripts - hostPath: - path: {{ printf "%s/init/common/" .Values.projectRoot }} - type: DirectoryOrCreate - - name: trino-healthcheck-scripts - hostPath: - path: {{ printf "%s/healthcheck/" .Values.projectRoot }} - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Values.trino.serviceName }} -spec: - selector: - app: {{ .Values.trino.serviceName }} - ports: - - port: 8080 - targetPort: 8080 - name: http \ No newline at end of file diff --git a/helm-chart/values.yaml b/helm-chart/values.yaml deleted file mode 100644 index c87211f3..00000000 --- a/helm-chart/values.yaml +++ /dev/null @@ -1,212 +0,0 @@ -# Global settings -global: - storageClass: "" - namespace: gravitino-playground - projectRoot: "" - -# PostgreSQL settings -postgresql: - serviceName: &postgres_host_ip postgresql - image: - repository: postgres - tag: "13" - pullPolicy: IfNotPresent - resources: - limits: - cpu: 1 - memory: 500Mi - requests: - cpu: 200m - memory: 200Mi - env: - - name: POSTGRES_USER - value: postgres - - name: POSTGRES_PASSWORD - value: postgres - - name: ALLOW_IP_RANGE - value: 0.0.0.0/0 - -# MySQL settings -mysql: - serviceName: &mysql_host_ip mysql - image: - repository: mysql - tag: "8.0" - pullPolicy: IfNotPresent - resources: - limits: - cpu: 1 - memory: 500Mi - requests: - cpu: 200m - memory: 200Mi - env: - - name: MYSQL_HOST_IP - value: *mysql_host_ip - - name: MYSQL_ROOT_PASSWORD - value: mysql - - name: MYSQL_USER - value: mysql - - name: MYSQL_PASSWORD - value: mysql - - name: MYSQL_DATABASE - value: db - -# Hive settings -hive: - serviceName: &hive_host_ip hive - image: - repository: apache/gravitino-playground - tag: hive-2.7.3 - pullPolicy: IfNotPresent - resources: - limits: - cpu: 3 - memory: 3Gi - requests: - cpu: 1 - memory: 1Gi - env: - - name: HIVE_HOST_IP - value: *hive_host_ip - - name: HADOOP_USER_NAME - value: &hadoop_user_name root - -# Gravitino settings -gravitino: - serviceName: &gravitino_host_ip gravitino - image: - repository: apache/gravitino - tag: 0.7.0-incubating - pullPolicy: IfNotPresent - resources: - limits: - cpu: 500m - memory: 1.5Gi - requests: - cpu: 300m - memory: 1.5Gi - env: - - name: HIVE_HOST_IP - value: *hive_host_ip - - name: MYSQL_HOST_IP - value: *mysql_host_ip - -# Trino settings -trino: - serviceName: &trino_host_ip trino - image: - repository: apache/gravitino-playground - tag: trino-435-gravitino-0.7.0-incubating - pullPolicy: IfNotPresent - resources: - limits: - cpu: 1 - memory: 2Gi - requests: - cpu: 500m - memory: 1Gi - env: - - name: HADOOP_USER_NAME - value: *hadoop_user_name - - name: GRAVITINO_HOST_IP - value: *gravitino_host_ip - - name: GRAVITINO_HOST_PORT - value: "8090" - - name: GRAVITINO_METALAKE_NAME - value: metalake_demo - - name: HIVE_HOST_IP - value: *hive_host_ip - - name: MYSQL_HOST_IP - value: *mysql_host_ip - - name: POSTGRES_HOST_IP - value: *postgres_host_ip - -# Spark settings -spark: - serviceName: &spark_host_ip spark - image: - repository: spark - tag: 3.4.3-scala2.12-java11-python3-r-ubuntu - pullPolicy: IfNotPresent - resources: - limits: - cpu: 1 - memory: 1Gi - requests: - cpu: 500m - memory: 500Mi - env: - - name: HADOOP_USER_NAME - value: *hadoop_user_name - - name: GRAVITINO_HOST_IP - value: *gravitino_host_ip - - name: GRAVITINO_HOST_PORT - value: "8090" - - name: HIVE_HOST_IP - value: *hive_host_ip - - name: TRINO_HOST_IP - value: *trino_host_ip - -# Jupyter settings -jupyter: - # service can not be set to jupyter, because we will get this error in jupyter container: - # ValueError: invalid literal for int() with base 10: 'tcp://192.168.194.169:8888' - serviceName: jupyternotebook - image: - repository: jupyter/pyspark-notebook - tag: spark-3.4.1 - pullPolicy: IfNotPresent - resources: - limits: - cpu: 500m - memory: 1Gi - requests: - cpu: 200m - memory: 200Mi - env: - - name: GRAVITINO_HOST_IP - value: *gravitino_host_ip - - name: HIVE_HOST_IP - value: *hive_host_ip - - name: TRINO_HOST_IP - value: *trino_host_ip - - name: POSTGRES_HOST_IP - value: *postgres_host_ip - - name: SPARK_HOST_IP - value: *spark_host_ip - -# Service specific settings -service: - type: ClusterIP - # If you want to expose services externally, you can use LoadBalancer - # type: LoadBalancer - -# Ingress settings -ingress: - enabled: false - # If you want to use ingress, uncomment and adjust the following: - # annotations: {} - # hosts: - # - host: chart-example.local - # paths: [] - # tls: [] - -# Security settings -securityContext: - {} - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 - -# Node selector -nodeSelector: {} - -# Tolerations -tolerations: [] - -# Affinity -affinity: {} diff --git a/init/common/init_metalake_catalog.sh b/init/common/init_metalake_catalog.sh index 9430f8d6..48c43f08 100644 --- a/init/common/init_metalake_catalog.sh +++ b/init/common/init_metalake_catalog.sh @@ -17,11 +17,11 @@ # under the License. # -response=$(curl http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo) +response=$(curl http://gravitino:8090/api/metalakes/metalake_demo) if echo "$response" | grep -q "\"code\":0"; then true else - response=$(curl -X POST -H "Content-Type: application/json" -d '{"name":"metalake_demo","comment":"comment","properties":{}}' http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes) + response=$(curl -X POST -H "Content-Type: application/json" -d '{"name":"metalake_demo","comment":"comment","properties":{}}' http://gravitino:8090/api/metalakes) if echo "$response" | grep -q "\"code\":0"; then true # Placeholder, do nothing else @@ -30,12 +30,12 @@ else fi fi -response=$(curl http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_hive) +response=$(curl http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_hive) if echo "$response" | grep -q "\"code\":0"; then true else # Create Hive catalog for experience Gravitino service - response=$(curl -X POST -H "Content-Type: application/json" -d '{"name":"catalog_hive","type":"RELATIONAL", "provider":"hive", "comment":"comment","properties":{"metastore.uris":"thrift://'${HIVE_HOST_IP}':9083" }}' http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs) + response=$(curl -X POST -H "Content-Type: application/json" -d '{"name":"catalog_hive","type":"RELATIONAL", "provider":"hive", "comment":"comment","properties":{"metastore.uris":"thrift://'${HIVE_HOST_IP}':9083" }}' http://gravitino:8090/api/metalakes/metalake_demo/catalogs) if echo "$response" | grep -q "\"code\":0"; then true # Placeholder, do nothing else @@ -44,12 +44,12 @@ else fi fi -response=$(curl http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_postgres) +response=$(curl http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_postgres) if echo "$response" | grep -q "\"code\":0"; then true else # Create Postgresql catalog for experience Gravitino service - response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H "Content-Type: application/json" -d '{ "name":"catalog_postgres", "type":"RELATIONAL", "provider":"jdbc-postgresql", "comment":"comment", "properties":{ "jdbc-url":"jdbc:postgresql://postgresql/db", "jdbc-user":"postgres", "jdbc-password":"postgres", "jdbc-database":"db", "jdbc-driver": "org.postgresql.Driver" } }' http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs) + response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H "Content-Type: application/json" -d '{ "name":"catalog_postgres", "type":"RELATIONAL", "provider":"jdbc-postgresql", "comment":"comment", "properties":{ "jdbc-url":"jdbc:postgresql://postgresql/db", "jdbc-user":"postgres", "jdbc-password":"postgres", "jdbc-database":"db", "jdbc-driver": "org.postgresql.Driver" } }' http://gravitino:8090/api/metalakes/metalake_demo/catalogs) if echo "$response" | grep -q "\"code\":0"; then true # Placeholder, do nothing else @@ -58,12 +58,12 @@ else fi fi -response=$(curl http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_mysql) +response=$(curl http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_mysql) if echo "$response" | grep -q "\"code\":0"; then true else # Create Mysql catalog for experience Gravitino service - response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H "Content-Type: application/json" -d '{ "name":"catalog_mysql", "type":"RELATIONAL", "provider":"jdbc-mysql", "comment":"comment", "properties":{ "jdbc-url":"jdbc:mysql://'${MYSQL_HOST_IP}':3306", "jdbc-user":"mysql", "jdbc-password":"mysql", "jdbc-driver": "com.mysql.cj.jdbc.Driver" } }' http://${GRAVITINO_HOST_IP}:8090/api/metalakes/metalake_demo/catalogs) + response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H "Content-Type: application/json" -d '{ "name":"catalog_mysql", "type":"RELATIONAL", "provider":"jdbc-mysql", "comment":"comment", "properties":{ "jdbc-url":"jdbc:mysql://'${MYSQL_HOST_IP}':3306", "jdbc-user":"mysql", "jdbc-password":"mysql", "jdbc-driver": "com.mysql.cj.jdbc.Driver" } }' http://gravitino:8090/api/metalakes/metalake_demo/catalogs) if echo "$response" | grep -q "catalog_mysql"; then true # Placeholder, do nothing @@ -73,12 +73,12 @@ else fi fi -response=$(curl http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_iceberg) +response=$(curl http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_iceberg) if echo "$response" | grep -q "\"code\":0"; then true else # Create Iceberg catalog for experience Gravitino service - response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H "Content-Type: application/json" -d '{ "name":"catalog_iceberg", "type":"RELATIONAL", "provider":"lakehouse-iceberg", "comment":"comment", "properties":{ "uri":"jdbc:mysql://'${MYSQL_HOST_IP}':3306/db", "catalog-backend":"jdbc", "warehouse":"hdfs://'${HIVE_HOST_IP}':9000/user/iceberg/warehouse/", "jdbc-user":"mysql", "jdbc-password":"mysql", "jdbc-driver":"com.mysql.cj.jdbc.Driver"} }' http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs) + response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H "Content-Type: application/json" -d '{ "name":"catalog_iceberg", "type":"RELATIONAL", "provider":"lakehouse-iceberg", "comment":"comment", "properties":{ "uri":"jdbc:mysql://'${MYSQL_HOST_IP}':3306/db", "catalog-backend":"jdbc", "warehouse":"hdfs://'${HIVE_HOST_IP}':9000/user/iceberg/warehouse/", "jdbc-user":"mysql", "jdbc-password":"mysql", "jdbc-driver":"com.mysql.cj.jdbc.Driver"} }' http://gravitino:8090/api/metalakes/metalake_demo/catalogs) if echo "$response" | grep -q "\"code\":0"; then true # Placeholder, do nothing else diff --git a/init/gravitino/gravitino.conf b/init/gravitino/gravitino.conf index 4cafe216..a1555918 100755 --- a/init/gravitino/gravitino.conf +++ b/init/gravitino/gravitino.conf @@ -75,8 +75,8 @@ gravitino.auxService.iceberg-rest.host = 0.0.0.0 # Iceberg REST service http port gravitino.auxService.iceberg-rest.httpPort = 9001 gravitino.auxService.iceberg-rest.catalog-backend = jdbc -gravitino.auxService.iceberg-rest.uri = jdbc:mysql://__MYSQL_HOST_IP__:3306/db -gravitino.auxService.iceberg-rest.warehouse = hdfs://__HIVE_HOST_IP__:9000/user/iceberg/warehouse/ +gravitino.auxService.iceberg-rest.uri = jdbc:mysql://mysql:3306/db +gravitino.auxService.iceberg-rest.warehouse = hdfs://hive:9000/user/iceberg/warehouse/ gravitino.auxService.iceberg-rest.jdbc.user = mysql gravitino.auxService.iceberg-rest.jdbc.password = mysql gravitino.auxService.iceberg-rest.jdbc-driver = com.mysql.cj.jdbc.Driver diff --git a/init/gravitino/init.sh b/init/gravitino/init.sh index a427c6f7..2d5a8502 100644 --- a/init/gravitino/init.sh +++ b/init/gravitino/init.sh @@ -26,9 +26,6 @@ cp /root/gravitino/catalogs/jdbc-postgresql/libs/postgresql-42.2.7.jar /root/gra cp /root/gravitino/catalogs/jdbc-mysql/libs/mysql-connector-java-8.0.27.jar /root/gravitino/iceberg-rest-server/libs cp /tmp/gravitino/gravitino.conf /root/gravitino/conf -# set env var -sed -i 's/__MYSQL_HOST_IP__/'"$MYSQL_HOST_IP"'/g' /root/gravitino/conf/gravitino.conf -sed -i 's/__HIVE_HOST_IP__/'"$HIVE_HOST_IP"'/g' /root/gravitino/conf/gravitino.conf echo "Finish downloading" echo "Start the Gravitino Server" /bin/bash /root/gravitino/bin/gravitino.sh start & diff --git a/init/hive/init.sh b/init/hive/init.sh index 95088336..3e7baa2a 100644 --- a/init/hive/init.sh +++ b/init/hive/init.sh @@ -17,11 +17,10 @@ # under the License. # -# remove command line `tail -f /dev/null` in the `/usr/local/sbin/start.sh` -sed -i '$d' /usr/local/sbin/start.sh -sed -i '$d' /usr/local/sbin/start.sh +# remove command `tail -f /dev/null` in `/usr/local/sbin/start.sh`, so we can run subsequent commands +sed -i -E 's/tail -f \/dev\/null/\s/g' /usr/local/sbin/start.sh + cp /tmp/hive/core-site.xml /tmp/hadoop-conf -sed -i "s|hdfs://localhost:9000|hdfs://${HIVE_HOST_IP}:9000|g" /usr/local/hive/conf/hive-site.xml /bin/bash /usr/local/sbin/start.sh hdfs dfs -mkdir -p /user/gravitino hdfs dfs -mkdir -p /user/iceberg/warehouse diff --git a/init/jupyter/gravitino-fileset-example.ipynb b/init/jupyter/gravitino-fileset-example.ipynb index daa2a856..7f45ae8a 100644 --- a/init/jupyter/gravitino-fileset-example.ipynb +++ b/init/jupyter/gravitino-fileset-example.ipynb @@ -20,10 +20,8 @@ "from hdfs import InsecureClient\n", "import os\n", "\n", - "hive_host_ip=os.getenv('HIVE_HOST_IP')\n", - "\n", "# Create a HDFS connector client\n", - "hdfs_client = InsecureClient(f\"http://{hive_host_ip}:50070\", user='root')\n", + "hdfs_client = InsecureClient(\"http://hive:50070\", user='root')\n", "\n", "# List HDFS file and directories\n", "print(hdfs_client.list('/user/gravitino'))\n", @@ -52,10 +50,8 @@ "from gravitino import NameIdentifier, GravitinoAdminClient, GravitinoClient, Catalog, Fileset, FilesetChange\n", "import os \n", "\n", - "gravitino_host_ip=os.getenv('GRAVITINO_HOST_IP')\n", - "\n", "# Create Gravitino admin client\n", - "gravitino_admin_client = GravitinoAdminClient(uri=f\"http://{gravitino_host_ip}:8090\")\n", + "gravitino_admin_client = GravitinoAdminClient(uri=\"http://gravitino:8090\")\n", "\n", "# Create metalake via Gravitino admin client\n", "metalake_name=\"default\"\n", @@ -73,7 +69,7 @@ "outputs": [], "source": [ "# Create Gravitino client\n", - "gravitino_client = GravitinoClient(uri=f\"http://{gravitino_host_ip}:8090\", metalake_name=metalake_name)" + "gravitino_client = GravitinoClient(uri=\"http://gravitino:8090\", metalake_name=metalake_name)" ] }, { @@ -131,7 +127,7 @@ "# Create schema entity via Gravition client\n", "schema_name=\"schema\"\n", "schema_path=\"/user/gravitino/\"+schema_name\n", - "schema_hdfs_path=f\"hdfs://{hive_host_ip}:9000{schema_path}\"\n", + "schema_hdfs_path=f\"hdfs://hive:9000{schema_path}\"\n", "\n", "catalog.as_schemas().create_schema(schema_name=schema_name, \n", " comment=\"\", \n", @@ -157,7 +153,7 @@ "# Create a managed type of Fileset\n", "managed_fileset_name=\"managed_fileset\"\n", "managed_fileset_path=\"/user/gravitino/\"+schema_name+\"/\"+managed_fileset_name\n", - "managed_fileset_hdfs_path=f\"hdfs://{hive_host_ip}:9000{managed_fileset_path}\"\n", + "managed_fileset_hdfs_path=f\"hdfs://hive:9000{managed_fileset_path}\"\n", "\n", "managed_fileset_ident: NameIdentifier = NameIdentifier.of(schema_name, managed_fileset_name)\n", "catalog.as_fileset_catalog().create_fileset(ident=managed_fileset_ident,\n", @@ -184,7 +180,7 @@ "source": [ "external_fileset_name=\"external_fileset\"\n", "external_fileset_path=\"/user/gravitino/\"+schema_name+\"/\"+external_fileset_name\n", - "external_fileset_hdfs_path=f\"hdfs://{hive_host_ip}:9000{external_fileset_path}\"\n", + "external_fileset_hdfs_path=f\"hdfs://hive:9000{external_fileset_path}\"\n", "\n", "# Create a fileset path in HDFS in advance\n", "hdfs_client.makedirs(external_fileset_path)\n", diff --git a/init/jupyter/gravitino-spark-trino-example.ipynb b/init/jupyter/gravitino-spark-trino-example.ipynb index ac88c49c..4cbf078d 100644 --- a/init/jupyter/gravitino-spark-trino-example.ipynb +++ b/init/jupyter/gravitino-spark-trino-example.ipynb @@ -19,10 +19,6 @@ "import os\n", "from pyspark.sql import SparkSession\n", "\n", - "gravitino_host_ip=os.getenv('GRAVITINO_HOST_IP')\n", - "hive_host_ip = os.getenv('HIVE_HOST_IP')\n", - "trino_host_ip = os.getenv('TRINO_HOST_IP')\n", - "spark_host_ip = os.getenv('SPARK_HOST_IP')\n", "spark_home = os.getenv('SPARK_HOME')\n", "os.environ['HADOOP_USER_NAME']=\"anonymous\"\n", "\n", @@ -30,14 +26,14 @@ " .appName(\"PySpark SQL Example\") \\\n", " .config(\"spark.plugins\", \"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n", " .config(\"spark.jars\", \"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,/tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar\") \\\n", - " .config(\"spark.sql.gravitino.uri\", f\"http://{gravitino_host_ip}:8090\") \\\n", + " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n", " .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n", " .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n", " .config(\"spark.sql.catalog.catalog_rest\", \"org.apache.iceberg.spark.SparkCatalog\") \\\n", " .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n", - " .config(\"spark.sql.catalog.catalog_rest.uri\", f\"http://{gravitino_host_ip}:9001/iceberg/\") \\\n", + " .config(\"spark.sql.catalog.catalog_rest.uri\", \"http://gravitino:9001/iceberg/\") \\\n", " .config(\"spark.locality.wait.node\", \"0\") \\\n", - " .config(\"spark.sql.warehouse.dir\", f\"hdfs://{hive_host_ip}:9000/user/hive/warehouse\") \\\n", + " .config(\"spark.sql.warehouse.dir\", \"hdfs://hive:9000/user/hive/warehouse\") \\\n", " .enableHiveSupport() \\\n", " .getOrCreate()" ] @@ -107,7 +103,7 @@ "\n", "# Create a Trino connector client\n", "conn = connect(\n", - " host=trino_host_ip,\n", + " host=\"trino\",\n", " port=8080,\n", " user=\"admin\",\n", " catalog=\"catalog_hive\",\n", diff --git a/init/jupyter/gravitino-trino-example.ipynb b/init/jupyter/gravitino-trino-example.ipynb index 7130d0b3..edd4e510 100644 --- a/init/jupyter/gravitino-trino-example.ipynb +++ b/init/jupyter/gravitino-trino-example.ipynb @@ -31,11 +31,9 @@ "from trino.dbapi import connect\n", "import os\n", "\n", - "trino_host_ip=os.getenv('TRINO_HOST_IP')\n", - "\n", "# Create a Trino connector client\n", "conn = connect(\n", - " host=trino_host_ip,\n", + " host=\"trino\",\n", " port=8080,\n", " user=\"admin\",\n", " catalog=\"catalog_hive\",\n", @@ -52,7 +50,7 @@ "source": [ "## Prepare\n", "\n", - "Creates a schema named `catalog_hive.company` in Hive, with its location set to`hdfs://{hive_host_ip}:9000/user/hive/warehouse/company.db` on HDFS." + "Creates a schema named `catalog_hive.company` in Hive, with its location set to`hdfs://hive:9000/user/hive/warehouse/company.db` on HDFS." ] }, { @@ -64,11 +62,9 @@ "source": [ "import os\n", "\n", - "hive_host_ip=os.getenv('HIVE_HOST_IP')\n", - "\n", - "trino_client.execute(f\"\"\"\n", + "trino_client.execute(\"\"\"\n", "CREATE SCHEMA catalog_hive.company\n", - " WITH (location = 'hdfs://{hive_host_ip}:9000/user/hive/warehouse/company.db')\n", + " WITH (location = 'hdfs://hive:9000/user/hive/warehouse/company.db')\n", "\"\"\").fetchall()" ] }, diff --git a/init/jupyter/gravitino_llamaIndex_demo.ipynb b/init/jupyter/gravitino_llamaIndex_demo.ipynb index 3386a10d..ccd5d278 100644 --- a/init/jupyter/gravitino_llamaIndex_demo.ipynb +++ b/init/jupyter/gravitino_llamaIndex_demo.ipynb @@ -85,9 +85,7 @@ "from gravitino import NameIdentifier, GravitinoClient, Catalog, Fileset, GravitinoAdminClient\n", "import os \n", "\n", - "gravitino_host_ip=os.getenv('GRAVITINO_HOST_IP')\n", - "\n", - "gravitino_url = f\"http://{gravitino_host_ip}:8090\"\n", + "gravitino_url = \"http://gravitino:8090\"\n", "metalake_name = \"metalake_demo\"\n", "\n", "catalog_name = \"catalog_fileset\"\n", @@ -234,9 +232,7 @@ "from sqlalchemy.sql.expression import select, text\n", "import os \n", "\n", - "trino_host_ip=os.getenv('TRINO_HOST_IP')\n", - "\n", - "trino_engine = create_engine(f\"trino://admin@{trino_host_ip}:8080/catalog_mysql/demo_llamaindex\")\n", + "trino_engine = create_engine(\"trino://admin@trino:8080/catalog_mysql/demo_llamaindex\")\n", "\n", "connection = trino_engine.connect();\n", "\n", diff --git a/init/spark/init.sh b/init/spark/init.sh index e3e7aba8..49b8f8b2 100644 --- a/init/spark/init.sh +++ b/init/spark/init.sh @@ -19,10 +19,6 @@ mkdir -p /opt/spark/conf cp /tmp/spark/spark-defaults.conf /opt/spark/conf -# inject host information -sed -i 's/__GRAVITINO_HOST_IP__/'"$GRAVITINO_HOST_IP"'/g' /opt/spark/conf/spark-defaults.conf -sed -i 's/__HIVE_HOST_IP__/'"$HIVE_HOST_IP"'/g' /opt/spark/conf/spark-defaults.conf - cp /tmp/spark/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar /opt/spark/jars/iceberg-spark-runtime-3.4_2.12-1.5.2.jar cp /tmp/spark/packages/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar /opt/spark/jars/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar cp /tmp/spark/packages/mysql-connector-java-8.0.27.jar /opt/spark/jars/mysql-connector-java-8.0.27.jar diff --git a/init/spark/spark-defaults.conf b/init/spark/spark-defaults.conf index fdf1a2c3..446f865e 100644 --- a/init/spark/spark-defaults.conf +++ b/init/spark/spark-defaults.conf @@ -18,14 +18,14 @@ # spark.plugins org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin -spark.sql.gravitino.uri http://__GRAVITINO_HOST_IP__:8090 +spark.sql.gravitino.uri http://gravitino:8090 spark.sql.gravitino.metalake metalake_demo spark.sql.gravitino.enableIcebergSupport true spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions spark.sql.catalog.catalog_rest org.apache.iceberg.spark.SparkCatalog spark.sql.catalog.catalog_rest.type rest -spark.sql.catalog.catalog_rest.uri http://__GRAVITINO_HOST_IP__:9001/iceberg/ +spark.sql.catalog.catalog_rest.uri http://gravitino:9001/iceberg/ spark.locality.wait.node 0 -spark.sql.warehouse.dir hdfs://__HIVE_HOST_IP__:9000/user/hive/warehouse +spark.sql.warehouse.dir hdfs://hive:9000/user/hive/warehouse spark.sql.hive.metastore.jars path spark.sql.hive.metastore.jars.path file:///opt/spark/jars/* diff --git a/init/trino/init.sh b/init/trino/init.sh index a74f6090..7433c67e 100644 --- a/init/trino/init.sh +++ b/init/trino/init.sh @@ -30,7 +30,7 @@ while [ $counter -le 240 ]; do echo "Wait for the initialization of services" sleep 5 else - trino --execute "create schema catalog_hive.sales with (location = 'hdfs://${HIVE_HOST_IP}:9000/user/hive/warehouse/sales.db');" + trino --execute "create schema catalog_hive.sales with (location = 'hdfs://hive:9000/user/hive/warehouse/sales.db');" echo "Import the data of the Hive warehouse" trino ${playground_dir}/playground-${logSuffix}.log 2>&1 & - echo "Check log details: ${playground_dir}/playground-${logSuffix}.log" - ;; - esac + logSuffix=$(date +%Y%m%d%H%m%s) + if [ "$enableRanger" == true ]; then + docker-compose -f docker-compose.yaml -f docker-enable-ranger-hive-override.yaml up --detach + else + docker-compose up --detach + fi + + docker compose logs -f >${playground_dir}/playground-${logSuffix}.log 2>&1 & + echo "Check log details: ${playground_dir}/playground-${logSuffix}.log" } status() { - case "$runtime" in - k8s) - kubectl -n gravitino-playground get pods -o wide - ;; - docker) - docker-compose ps -a - ;; - esac + docker-compose ps -a } stop() { echo "INFO: Stopping the playground..." - case "$runtime" in - k8s) - helm uninstall --namespace gravitino-playground gravitino-playground - ;; - docker) - docker-compose down - if [ $? -eq 0 ]; then - echo "INFO: Playground stopped!" - fi - ;; - esac + docker-compose down + if [ $? -eq 0 ]; then + echo "INFO: Playground stopped!" + fi } -runtime="" - case "$1" in -k8s) - runtime="k8s"; - ;; -docker) - runtime="docker"; - ;; -*) - echo "ERROR: please specify which runtime you want to use, available runtime: [docker|k8s]" -esac - -case "$2" in start) - if [[ "$3" == "-y" ]]; then + if [[ "$2" == "-y" ]]; then input="y" else echo "The playground requires 2 CPU cores, 8 GB of RAM, and 25 GB of disk storage to operate efficiently." read -r -p "Confirm the requirement is available in your OS [Y/n]:" input fi - if [[ "$4" == "--enable-ranger" || "$3" == "--enable-ranger" ]]; then + if [[ "$2" == "--enable-ranger" || "$3" == "--enable-ranger" ]]; then enableRanger=true else enableRanger=false @@ -219,7 +136,7 @@ stop) stop ;; *) - echo "Usage: $0 [k8s|docker] [start | status | stop]" + echo "Usage: $0 [start | status | stop]" exit 1 ;; esac