Skip to content

Commit

Permalink
feat(backup): add new command dataset backup for server-side backups (#…
Browse files Browse the repository at this point in the history
…5571)

* feat(cli): new dataset backup enable/disable commands, add list and get backup command, refactor enable/disable commands, fix backup list CLI to use correct response type and error handling

* feat(cli): group commands into dataset-backup subcommand, add prompt selection for backup ID and dataset names, add progress bar

* feat(cli): Rename dataset backup subcommand to just backup, handle file names that contain a path segment to prevent archiving failure

* fix(cli): refactor input into its own function; fix progress tracking spinner, void using long names for temporary dir to prevent hitting max length limit, handle archive warning, enable compression of archived file by default

* fix(CLI): fix dataset backups to be importable through sanity import command

* fix(cli): refactor prompt usage in dataset backup command, add concurrency safe download of documents, refactor code into modules that can be easily tested, improve progress tracking for dataset backup, install progress-stream correctly, address review feedback, fix API usage for list backups

* fix(cli): address PR #5571 comments in dataset backup CLI, add node module namespace in imports, use interface in place of type, handle unhandled rejection

* fix(cli): in backup CLI, add common action to extract error from API response, use yargs to parse CLI flags

---------

Co-authored-by: Matt Craig <[email protected]>
  • Loading branch information
j33ty and codebymatt authored Feb 12, 2024
1 parent 6694945 commit f04c76e
Show file tree
Hide file tree
Showing 21 changed files with 1,097 additions and 3 deletions.
3 changes: 3 additions & 0 deletions packages/sanity/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,9 @@
"@types/tar-stream": "^3.1.3",
"@types/use-sync-external-store": "^0.0.5",
"@vitejs/plugin-react": "^4.2.0",
"archiver": "^6.0.1",
"arrify": "^1.0.1",
"async-mutex": "^0.4.1",
"chalk": "^4.1.2",
"chokidar": "^3.5.3",
"classnames": "^2.2.5",
Expand Down Expand Up @@ -310,6 +312,7 @@
"@testing-library/jest-dom": "^5.16.5",
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.0.16",
"@types/archiver": "^6.0.2",
"@types/arrify": "^1.0.4",
"@types/connect-history-api-fallback": "^1.5.2",
"@types/lodash": "^4.14.149",
Expand Down
51 changes: 51 additions & 0 deletions packages/sanity/src/_internal/cli/actions/backup/archiveDir.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import {createWriteStream} from 'node:fs'
import zlib from 'node:zlib'

import {type ProgressData} from 'archiver'

import debug from './debug'

const archiver = require('archiver')

// ProgressCb is a callback that is called with the number of bytes processed so far.
type ProgressCb = (processedBytes: number) => void

// archiveDir creates a tarball of the given directory and writes it to the given file path.
function archiveDir(tmpOutDir: string, outFilePath: string, progressCb: ProgressCb): Promise<void> {
return new Promise((resolve, reject) => {
const archiveDestination = createWriteStream(outFilePath)
archiveDestination.on('error', (err: Error) => {
reject(err)
})

archiveDestination.on('close', () => {
resolve()
})

const archive = archiver('tar', {
gzip: true,
gzipOptions: {level: zlib.constants.Z_DEFAULT_COMPRESSION},
})

archive.on('error', (err: Error) => {
debug('Archiving errored!\n%s', err.stack)
reject(err)
})

// Catch warnings for non-blocking errors (stat failures and others)
archive.on('warning', (err: Error) => {
debug('Archive warning: %s', err.message)
})

archive.on('progress', (progress: ProgressData) => {
progressCb(progress.fs.processedBytes)
})

// Pipe archive data to the file
archive.pipe(archiveDestination)
archive.directory(tmpOutDir, false)
archive.finalize()
})
}

export default archiveDir
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import {type CliCommandContext} from '@sanity/cli'

import {defaultApiVersion} from '../../commands/backup/backupGroup'
import resolveApiClient from './resolveApiClient'

// maxBackupIdsShown is the maximum number of backup IDs to show in the prompt.
// Higher numbers will cause the prompt to be slow.
const maxBackupIdsShown = 100

async function chooseBackupIdPrompt(
context: CliCommandContext,
datasetName: string,
): Promise<string> {
const {prompt} = context

const {projectId, token, client} = await resolveApiClient(context, datasetName, defaultApiVersion)

try {
// Fetch last $maxBackupIdsShown backups for this dataset.
// We expect here that API returns backups sorted by creation date in descending order.
const response = await client.request({
headers: {Authorization: `Bearer ${token}`},
uri: `/projects/${projectId}/datasets/${datasetName}/backups`,
query: {limit: maxBackupIdsShown.toString()},
})

if (response?.backups?.length > 0) {
const backupIdChoices = response.backups.map((backup: {id: string}) => ({
value: backup.id,
}))
const selected = await prompt.single({
message: `Select backup ID to use (only last ${maxBackupIdsShown} shown)`,
type: 'list',
choices: backupIdChoices,
})

return selected
}
} catch (err) {
throw new Error(`Failed to fetch backups for dataset ${datasetName}: ${err.message}`)
}

throw new Error('No backups found')
}

export default chooseBackupIdPrompt
13 changes: 13 additions & 0 deletions packages/sanity/src/_internal/cli/actions/backup/cleanupTmpDir.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import rimraf from 'rimraf'

import debug from './debug'

function cleanupTmpDir(tmpDir: string): void {
rimraf(tmpDir, (err) => {
if (err) {
debug(`Error cleaning up temporary files: ${err.message}`)
}
})
}

export default cleanupTmpDir
1 change: 1 addition & 0 deletions packages/sanity/src/_internal/cli/actions/backup/debug.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export default require('debug')('sanity:backup')
54 changes: 54 additions & 0 deletions packages/sanity/src/_internal/cli/actions/backup/downloadAsset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import {createWriteStream} from 'node:fs'
import path from 'node:path'

import {getIt} from 'get-it'
import {keepAlive, promise} from 'get-it/middleware'

import debug from './debug'
import withRetry from './withRetry'

const CONNECTION_TIMEOUT = 15 * 1000 // 15 seconds
const READ_TIMEOUT = 3 * 60 * 1000 // 3 minutes

const request = getIt([keepAlive(), promise()])

async function downloadAsset(
url: string,
fileName: string,
fileType: string,
outDir: string,
): Promise<void> {
// File names that contain a path to file (e.g. sanity-storage/assets/file-name.tar.gz) fail when archive is
// created due to missing parent dir (e.g. sanity-storage/assets), so we want to handle them by taking
// the base name as file name.
const normalizedFileName = path.basename(fileName)

const assetFilePath = getAssetFilePath(normalizedFileName, fileType, outDir)
await withRetry(async () => {
const response = await request({
url: url,
maxRedirects: 5,
timeout: {connect: CONNECTION_TIMEOUT, socket: READ_TIMEOUT},
stream: true,
})

debug('Received asset %s with status code %d', normalizedFileName, response?.statusCode)

response.body.pipe(createWriteStream(assetFilePath))
})
}

function getAssetFilePath(fileName: string, fileType: string, outDir: string): string {
// Set assetFilePath if we are downloading an asset file.
// If it's a JSON document, assetFilePath will be an empty string.
let assetFilePath = ''
if (fileType === 'image') {
assetFilePath = path.join(outDir, 'images', fileName)
} else if (fileType === 'file') {
assetFilePath = path.join(outDir, 'files', fileName)
}

return assetFilePath
}

export default downloadAsset
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import {getIt, type MiddlewareResponse} from 'get-it'
import {keepAlive, promise} from 'get-it/middleware'

import debug from './debug'
import withRetry from './withRetry'

const CONNECTION_TIMEOUT = 15 * 1000 // 15 seconds
const READ_TIMEOUT = 3 * 60 * 1000 // 3 minutes

const request = getIt([keepAlive(), promise()])

// eslint-disable-next-line @typescript-eslint/no-explicit-any
async function downloadDocument(url: string): Promise<any> {
const response = await withRetry<MiddlewareResponse>(() =>
request({
url,
maxRedirects: 5,
timeout: {connect: CONNECTION_TIMEOUT, socket: READ_TIMEOUT},
}),
)

debug('Received document from %s with status code %d', url, response?.statusCode)

return response.body
}

export default downloadDocument
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import {Readable} from 'node:stream'

import {type QueryParams, type SanityClient} from '@sanity/client'

type File = {
name: string
url: string
type: string
}

type GetBackupResponse = {
createdAt: string
totalFiles: number
files: File[]
nextCursor?: string
}

class PaginatedGetBackupStream extends Readable {
private cursor = ''
private readonly client: SanityClient
private readonly projectId: string
private readonly datasetName: string
private readonly backupId: string
private readonly token: string
public totalFiles = 0

constructor(
client: SanityClient,
projectId: string,
datasetName: string,
backupId: string,
token: string,
) {
super({objectMode: true})
this.client = client
this.projectId = projectId
this.datasetName = datasetName
this.backupId = backupId
this.token = token
}

async _read(): Promise<void> {
try {
const data = await this.fetchNextBackupPage()

// Set totalFiles when it's fetched for the first time
if (this.totalFiles === 0) {
this.totalFiles = data.totalFiles
}

data.files.forEach((file: File) => this.push(file))

if (typeof data.nextCursor === 'string' && data.nextCursor !== '') {
this.cursor = data.nextCursor
} else {
// No more pages left to fetch.
this.push(null)
}
} catch (err) {
this.destroy(err as Error)
}
}

// fetchNextBackupPage fetches the next page of backed up files from the backup API.
async fetchNextBackupPage(): Promise<GetBackupResponse> {
const query: QueryParams = this.cursor === '' ? {} : {nextCursor: this.cursor}

try {
return await this.client.request({
headers: {Authorization: `Bearer ${this.token}`},
uri: `/projects/${this.projectId}/datasets/${this.datasetName}/backups/${this.backupId}`,
query,
})
} catch (error) {
// It can be clearer to pull this logic out in a common error handling function for re-usability.
let msg = error.statusCode ? error.response.body.message : error.message

// If no message can be extracted, print the whole error.
if (msg === undefined) {
msg = String(error)
}
throw new Error(`Downloading dataset backup failed: ${msg}`)
}
}
}

export {PaginatedGetBackupStream}
export type {File, GetBackupResponse}
35 changes: 35 additions & 0 deletions packages/sanity/src/_internal/cli/actions/backup/parseApiErr.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// apiErr is a type that represents an error returned by the API
interface ApiErr {
statusCode: number
message: string
}

// parseApiErr is a function that attempts with the best effort to parse
// an error returned by the API since different API endpoint may end up
// returning different error structures.
// eslint-disable-next-line @typescript-eslint/no-explicit-any,@typescript-eslint/explicit-module-boundary-types
function parseApiErr(err: any): ApiErr {
const apiErr = {} as ApiErr
if (err.code) {
apiErr.statusCode = err.code
} else if (err.statusCode) {
apiErr.statusCode = err.statusCode
}

if (err.message) {
apiErr.message = err.message
} else if (err.statusMessage) {
apiErr.message = err.statusMessage
} else if (err?.response?.body?.message) {
apiErr.message = err.response.body.message
} else if (err?.response?.data?.message) {
apiErr.message = err.response.data.message
} else {
// If no message can be extracted, print the whole error.
apiErr.message = JSON.stringify(err)
}

return apiErr
}

export default parseApiErr
Loading

0 comments on commit f04c76e

Please sign in to comment.