From 5b7d4c7368c1664691f368d9be2d919b55cfa373 Mon Sep 17 00:00:00 2001 From: Arup Malakar Date: Wed, 11 Dec 2024 13:39:31 -0800 Subject: [PATCH] Use docker compose file to simplify command line invocation --- utilities/Spark_UI/Dockerfile | 4 +++- utilities/Spark_UI/README.md | 3 +-- utilities/Spark_UI/docker-compose.yml | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 utilities/Spark_UI/docker-compose.yml diff --git a/utilities/Spark_UI/Dockerfile b/utilities/Spark_UI/Dockerfile index 9353b8d..7958c06 100644 --- a/utilities/Spark_UI/Dockerfile +++ b/utilities/Spark_UI/Dockerfile @@ -22,6 +22,8 @@ RUN rm /opt/spark/jars/jsr305-3.0.0.jar && \ RUN echo $'\n\ spark.eventLog.enabled true\n\ spark.history.ui.port 18080\n\ +spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem\n\ +spark.hadoop.fs.s3a.aws.credentials.provider org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider\n\ ' > /opt/spark/conf/spark-defaults.conf RUN echo $'\n\ @@ -34,4 +36,4 @@ log4j.appender.console.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %p %c{ ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk/ -ENTRYPOINT ["/bin/bash", "-c"]: +ENTRYPOINT ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.history.HistoryServer"] diff --git a/utilities/Spark_UI/README.md b/utilities/Spark_UI/README.md index 9c4b21a..3eaa9ce 100644 --- a/utilities/Spark_UI/README.md +++ b/utilities/Spark_UI/README.md @@ -47,8 +47,7 @@ If you prefer local access (not to have EC2 instance for Apache Spark history se $ AWS_ACCESS_KEY_ID="ASIAxxxxxxxxxxxx" $ AWS_SECRET_ACCESS_KEY="yyyyyyyyyyyyyyy" $ AWS_SESSION_TOKEN="zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" - $ docker run -itd -e SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=$LOG_DIR -Dspark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID -Dspark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY -Dspark.hadoop.fs.s3a.session.token=$AWS_SESSION_TOKEN -Dspark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider" -p 18080:18080 glue/sparkui:latest "/opt/spark/bin/spark-class org.apache.spark.deploy.history.HistoryServer" - ``` + $ docker-compose up -d These configuration parameters come from the [Hadoop-AWS Module](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html). You may need to add specific configuration based on your use case. For example: users in isolated regions will need to configure the `spark.hadoop.fs.s3a.endpoint`. diff --git a/utilities/Spark_UI/docker-compose.yml b/utilities/Spark_UI/docker-compose.yml new file mode 100644 index 0000000..f555e1b --- /dev/null +++ b/utilities/Spark_UI/docker-compose.yml @@ -0,0 +1,11 @@ +version: '3' +services: + spark-history: + build: . + ports: + - "18080:18080" + environment: + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} + - AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN} + - SPARK_HISTORY_OPTS=-Dspark.history.fs.logDirectory=${LOG_DIR} -Dspark.hadoop.fs.s3a.access.key=${AWS_ACCESS_KEY_ID} -Dspark.hadoop.fs.s3a.secret.key=${AWS_SECRET_ACCESS_KEY} -Dspark.hadoop.fs.s3a.session.token=${AWS_SESSION_TOKEN} \ No newline at end of file