From 74f40b8a11a32ba37194c30b1faa322cc6eba8b0 Mon Sep 17 00:00:00 2001 From: Ivan Yurchenko Date: Sat, 30 Sep 2023 16:30:00 +0300 Subject: [PATCH] Add GCS demo --- demo/Makefile | 9 +++ demo/README.md | 32 +++++++++++ demo/compose-gcs-fake-gcs-server.yml | 86 ++++++++++++++++++++++++++++ demo/fake-gcs-server-configure.sh | 24 ++++++++ 4 files changed, 151 insertions(+) create mode 100644 demo/compose-gcs-fake-gcs-server.yml create mode 100755 demo/fake-gcs-server-configure.sh diff --git a/demo/Makefile b/demo/Makefile index 21948702d..dfd30a0c6 100644 --- a/demo/Makefile +++ b/demo/Makefile @@ -107,11 +107,16 @@ run_s3_aws: run_s3_minio: docker compose -f compose-s3-minio.yml up +.PHONY: run_gcs_fake_gcs_server +run_gcs_fake_gcs_server: + docker compose -f compose-gcs-fake-gcs-server.yml up + .PHONY: clean clean: docker compose -f compose-local-fs.yml down docker compose -f compose-s3-aws.yml down docker compose -f compose-s3-minio.yml down + docker compose -f compose-gcs-fake-gcs-server.yml down .PHONY: show_local_data show_local_data: @@ -132,6 +137,10 @@ show_remote_data_s3_minio: docker run --rm --network=host --entrypoint /usr/bin/bash quay.io/minio/mc \ -c "mc alias set mycloud http://localhost:9000 minioadmin minioadmin && mc ls --recursive mycloud/test-bucket | grep $(topic)" +.PHONY: show_remote_data_gcs_fake_gcs_server +show_remote_data_gcs_fake_gcs_server: + curl http://localhost:4443/storage/v1/b/test-bucket/o | jq -r '.items | map(.name) | .[]' + offset = 0 consume = 10 .PHONY: consume diff --git a/demo/README.md b/demo/README.md index d1ada7831..0d23139fa 100644 --- a/demo/README.md +++ b/demo/README.md @@ -10,6 +10,7 @@ To run the demos, you need: - Docker Compose - `make` - AWS S3 command line tool (optional) +- `jq` (optional) ## Running @@ -127,6 +128,37 @@ make consume You can also see the remote data in http://localhost:9090/browser/test-bucket (login: `minioadmin`, password: `minioadmin`). +### fake-gcs-server as remote storage: `compose-gcs-fake-gcs-server.yml` + +This scenario uses `GcsStorage` with [fake-gcs-server](https://github.com/fsouza/fake-gcs-server) as the remote storage. + +```bash +# Start the compose +make run_gcs_fake_gcs_server + +# Create the topic with any variation +make create_topic_by_size_ts +# or +# make create_topic_by_time_ts +# or with TS disabled +# make create_topic_*_no_ts + +# Fill the topic +make fill_topic + +# See that segments are uploaded to the remote storage +# (this may take several seconds) +make show_remote_data_gcs_fake_gcs_server + +# Check that early segments are deleted +# (completely or renamed with `.deleted` suffix) +# from the local storage (this may take several seconds) +make show_local_data + +# Check the data is consumable +make consume +``` + ## Additional features ### Encryption diff --git a/demo/compose-gcs-fake-gcs-server.yml b/demo/compose-gcs-fake-gcs-server.yml new file mode 100644 index 000000000..00dbcf09b --- /dev/null +++ b/demo/compose-gcs-fake-gcs-server.yml @@ -0,0 +1,86 @@ +## +# Copyright 2023 Aiven Oy +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +version: '3.8' +services: + zookeeper: + image: "confluentinc/cp-zookeeper:7.3.3" + ports: + - "2181:2181" + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + + kafka: + image: "aivenoy/kafka-with-ts-plugin" + container_name: "kafka-ts" + depends_on: + - zookeeper + - fake-gcs-server + ports: + - "9092:9092" + - "7000:7000" #prometheus metrics + environment: + KAFKA_BROKER_ID: 0 + KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181" + KAFKA_LISTENERS: "PLAINTEXT://0.0.0.0:9092,BROKER://0.0.0.0:29092" + KAFKA_ADVERTISED_LISTENERS: "PLAINTEXT://localhost:9092,BROKER://kafka:29092" + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: "PLAINTEXT:PLAINTEXT,BROKER:PLAINTEXT" + KAFKA_INTER_BROKER_LISTENER_NAME: "BROKER" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_OFFSETS_TOPIC_NUM_PARTITIONS: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_AUTO_CREATE_TOPICS_ENABLE: false + # Increase Tiered Storage log level + KAFKA_LOG4J_LOGGERS: "io.aiven.kafka.tieredstorage=DEBUG" + # Tweak retention checking + KAFKA_LOG_RETENTION_CHECK_INTERVAL_MS: 10000 + # Enable Tiered Storage + KAFKA_REMOTE_LOG_STORAGE_SYSTEM_ENABLE: true + KAFKA_REMOTE_LOG_MANAGER_TASK_INTERVAL_MS: 5000 + # Remote metadata manager + KAFKA_REMOTE_LOG_METADATA_MANAGER_CLASS_NAME: "org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManager" + KAFKA_REMOTE_LOG_METADATA_MANAGER_IMPL_PREFIX: "rlmm.config." + KAFKA_REMOTE_LOG_METADATA_MANAGER_LISTENER_NAME: "BROKER" + KAFKA_RLMM_CONFIG_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR: 1 + # Remote storage manager + KAFKA_REMOTE_LOG_STORAGE_MANAGER_CLASS_PATH: "/tiered-storage-for-apache-kafka/core/*:/tiered-storage-for-apache-kafka/gcs/*" + KAFKA_REMOTE_LOG_STORAGE_MANAGER_CLASS_NAME: "io.aiven.kafka.tieredstorage.RemoteStorageManager" + KAFKA_REMOTE_LOG_STORAGE_MANAGER_IMPL_PREFIX: "rsm.config." + KAFKA_RSM_CONFIG_CHUNK_SIZE: 5242880 # 5MiB + KAFKA_RSM_CONFIG_CHUNK_CACHE_CLASS: "io.aiven.kafka.tieredstorage.chunkmanager.cache.InMemoryChunkCache" + KAFKA_RSM_CONFIG_CHUNK_CACHE_SIZE: -1 + KAFKA_RSM_CONFIG_CUSTOM_METADATA_FIELDS_INCLUDE: "REMOTE_SIZE" + # Storage backend + KAFKA_RSM_CONFIG_KEY_PREFIX: "tiered-storage-demo/" + KAFKA_RSM_CONFIG_STORAGE_BACKEND_CLASS: "io.aiven.kafka.tieredstorage.storage.gcs.GcsStorage" + KAFKA_RSM_CONFIG_STORAGE_GCS_BUCKET_NAME: "test-bucket" + KAFKA_RSM_CONFIG_STORAGE_GCS_ENDPOINT_URL: "http://fake-gcs-server:4443" + KAFKA_RSM_CONFIG_STORAGE_GCS_CREDENTIALS_DEFAULT: "false" + + fake-gcs-server: + image: fsouza/fake-gcs-server + ports: + - "4443:4443" + command: -scheme http + + fake-gcs-server-configure: + image: curlimages/curl + restart: "no" + depends_on: + - fake-gcs-server + volumes: + - "./fake-gcs-server-configure.sh:/fake-gcs-server-configure.sh" + command: /bin/sh /fake-gcs-server-configure.sh diff --git a/demo/fake-gcs-server-configure.sh b/demo/fake-gcs-server-configure.sh new file mode 100755 index 000000000..f60746ad8 --- /dev/null +++ b/demo/fake-gcs-server-configure.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +## +# Copyright 2023 Aiven Oy +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## + +curl -X PUT http://fake-gcs-server:4443/_internal/config \ + -H "Content-Type: application/json" \ + -d '{"externalUrl": "http://fake-gcs-server:4443"}' + +curl -X POST http://fake-gcs-server:4443/storage/v1/b?project=test-project \ + -H "Content-Type: application/json" \ + -d '{"name": "test-bucket"}'