diff --git a/.github/workflows/ledgerexporter.yml b/.github/workflows/ledgerexporter.yml index c80a367771..ac1e265582 100644 --- a/.github/workflows/ledgerexporter.yml +++ b/.github/workflows/ledgerexporter.yml @@ -13,9 +13,10 @@ jobs: CAPTIVE_CORE_DEBIAN_PKG_VERSION: 21.1.0-1921.b3aeb14cc.focal LEDGEREXPORTER_INTEGRATION_TESTS_ENABLED: "true" LEDGEREXPORTER_INTEGRATION_TESTS_CAPTIVE_CORE_BIN: /usr/bin/stellar-core - # this pins to a version of quickstart:testing that has the same version as LEDGEREXPORTER_INTEGRATION_TESTS_CAPTIVE_CORE_BIN + # this pins to a version of quickstart:testing that has the same version of core + # as specified on LEDGEREXPORTER_INTEGRATION_TESTS_CAPTIVE_CORE_BIN # this is the multi-arch index sha, get it by 'docker buildx imagetools inspect stellar/quickstart:testing' - LEDGEREXPORTER_INTEGRATION_TESTS_QUICKSTART_IMAGE: docker.io/stellar/quickstart:testing@sha256:03c6679f838a92b1eda4cd3a9e2bdee4c3586e278a138a0acf36a9bc99a0041f + LEDGEREXPORTER_INTEGRATION_TESTS_QUICKSTART_IMAGE: docker.io/stellar/quickstart:testing@sha256:5c8186f53cc98571749054dd782dce33b0aca2d1a622a7610362f7c15b79b1bf LEDGEREXPORTER_INTEGRATION_TESTS_QUICKSTART_IMAGE_PULL: "false" steps: - name: Install captive core diff --git a/services/horizon/CHANGELOG.md b/services/horizon/CHANGELOG.md index 501ad51847..cd5d8af57b 100644 --- a/services/horizon/CHANGELOG.md +++ b/services/horizon/CHANGELOG.md @@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## Pending + +### Added + +- Reingest from pre-computed tx meta on remote cloud storage. ([4911](https://github.com/stellar/go/issues/4911)), ([5374](https://github.com/stellar/go/pull/5374)) + - Configure horizon reingestion to obtain ledger tx meta in pre-computed files from a Google Cloud Storage(GCS) location. + - Using this option will no longer require a captive core binary be present and it no longer runs a captive core sub-process, instead obtaining the tx meta from the GCS backend. + - Horizon supports this new feature with two new parameters `ledgerbackend` and `datastore-config` on the `reingest` command. Refer to [Reingestion README](./internal/ingest/README.md#reingestion). + + + ## 2.31.0 ### Breaking Changes diff --git a/services/horizon/cmd/db_test.go b/services/horizon/cmd/db_test.go index d3fbcaf345..93dd1ce119 100644 --- a/services/horizon/cmd/db_test.go +++ b/services/horizon/cmd/db_test.go @@ -52,7 +52,7 @@ func (s *DBCommandsTestSuite) TestDefaultParallelJobSizeForBufferedBackend() { "--network", "testnet", "--parallel-workers", "2", "--ledgerbackend", "datastore", - "--datastore-config", "../config.storagebackend.toml", + "--datastore-config", "../internal/ingest/testdata/config.storagebackend.toml", "2", "10"}) @@ -99,7 +99,7 @@ func (s *DBCommandsTestSuite) TestUsesParallelJobSizeWhenSetForBuffered() { "--parallel-workers", "2", "--parallel-job-size", "5", "--ledgerbackend", "datastore", - "--datastore-config", "../config.storagebackend.toml", + "--datastore-config", "../internal/ingest/testdata/config.storagebackend.toml", "2", "10"}) @@ -154,7 +154,7 @@ func (s *DBCommandsTestSuite) TestDbReingestAndFillGapsCmds() { args: []string{ "1", "100", "--ledgerbackend", "datastore", - "--datastore-config", "../config.storagebackend.toml", + "--datastore-config", "../internal/ingest/testdata/config.storagebackend.toml", "--network-passphrase", "passphrase", "--history-archive-urls", "[]", }, @@ -165,7 +165,7 @@ func (s *DBCommandsTestSuite) TestDbReingestAndFillGapsCmds() { args: []string{ "1", "100", "--ledgerbackend", "datastore", - "--datastore-config", "../config.storagebackend.toml", + "--datastore-config", "../internal/ingest/testdata/config.storagebackend.toml", }, expectError: true, errorMessage: "network-passphrase must be set", @@ -175,7 +175,7 @@ func (s *DBCommandsTestSuite) TestDbReingestAndFillGapsCmds() { args: []string{ "1", "100", "--ledgerbackend", "datastore", - "--datastore-config", "../config.storagebackend.toml", + "--datastore-config", "../internal/ingest/testdata/config.storagebackend.toml", "--network-passphrase", "passphrase", }, expectError: true, @@ -217,7 +217,7 @@ func (s *DBCommandsTestSuite) TestDbReingestAndFillGapsCmds() { "1", "100", "--network", "testnet", "--ledgerbackend", "datastore", - "--datastore-config", "../config.storagebackend.toml", + "--datastore-config", "../internal/ingest/testdata/config.storagebackend.toml", }, expectError: false, }, diff --git a/services/horizon/internal/ingest/README.md b/services/horizon/internal/ingest/README.md index a0874a0b43..12982b5047 100644 --- a/services/horizon/internal/ingest/README.md +++ b/services/horizon/internal/ingest/README.md @@ -140,8 +140,46 @@ This pauses the state machine for 10 seconds then tries again, in hopes that a n **Next state**: [`start`](#start-state) -# Ingestion -TODO +# Reingestion +Horizon supports running reingestion by executing a sub command `db reingest range ` which will execute as an o/s process and will be synchronous, exiting the process only after the complete reingestion range is finished or an error is encountered. + +By default this sub-command will attempt to use captive core configuration in the form of stellar core binary(`--stellar-core-binary-path`) and stellar core config(`--captive-core-config-path`) to obtain ledger tx meta from a stellar network to be ingested. + +The `db reingest range` sub-command can optionally be configured to consume pre-computed ledger tx meta files from a Google Cloud Storage(GCS) location instead of running captive core on host machine. +Pre-requirements: + - Have a GCS account. + - Run the [ledgerexporter] to publish ledger tx meta files to your GCS bucket location. +Run the `db reingest` sub-command, configured to import tx meta from your GCS bucket: + ```$ DATABASE_URL= \ + NETWORK=testnet \ + stellar-horizon db reingest range \ + --parallel-workers 2 \ + --ledgerbackend "datastore" \ + --datastore-config "config.storagebackend.toml" \ + 100 200 + ``` +Notice, even though we no longer need to provide stellar-core related config for binary or config file, we do still need to provide network related config, using convenience parameter `NETWORK=testnet|pubnet` or directly with `NETWORK_PASSPHRASE` and `HISTORY_ARCHIVE_URLS` + +The `--datastore-config` must point to a new toml config file that will provide the necessary parameters for ingestion to work with remote GCS storage. + +example config toml: +``` +# Datastore Configuration +[datastore_config] +# Specifies the type of datastore. +# Currently, only Google Cloud Storage (GCS) is supported. +type = "GCS" + +[datastore_config.params] +# The Google Cloud Storage bucket path for storing data, with optional subpaths for organization. +destination_bucket_path = "path/to/my/bucket" + +[datastore_config.schema] +# Configuration for data organization of the remote files +ledgers_per_file = 1 # Number of ledgers stored in each file. +files_per_partition = 64000 # Number of files per partition/directory. + +``` # Range Preparation TODO: See `maybePrepareRange` diff --git a/services/horizon/config.storagebackend.toml b/services/horizon/internal/ingest/testdata/config.storagebackend.toml similarity index 100% rename from services/horizon/config.storagebackend.toml rename to services/horizon/internal/ingest/testdata/config.storagebackend.toml diff --git a/services/horizon/internal/integration/db_test.go b/services/horizon/internal/integration/db_test.go index 86a86a8055..b4716f10df 100644 --- a/services/horizon/internal/integration/db_test.go +++ b/services/horizon/internal/integration/db_test.go @@ -614,7 +614,7 @@ func TestReingestDatastore(t *testing.T) { "--network", "testnet", "--parallel-workers", "1", "--ledgerbackend", "datastore", - "--datastore-config", "../../config.storagebackend.toml", + "--datastore-config", "../ingest/testdata/config.storagebackend.toml", "997", "999"})