diff --git a/README.md b/README.md index 217f1d1..e815788 100644 --- a/README.md +++ b/README.md @@ -147,4 +147,10 @@ etl s3/testbucket/records.json elastic/test2/records --target_host=localhost:920 ``` ### files / s3files -If the records being streamed contains `filename` and a `body` which is either a stream or a function that returns a stream, the individual `bodys` can be saved to disk (using `files` target) or to s3 (using `s3files` target). By default both `files` and `s3files` will only save the file if the filename does not exist. Overwrite can be enforced by setting `--no_skip=true` \ No newline at end of file +If the records being streamed contains `filename` and a `body` which is either a stream or a function that returns a stream, the individual `bodys` can be saved to disk (using `files` target) or to s3 (using `s3files` target). + +By default both `files` and `s3files` will only save the file if the filename does not exist. Both methods start scanning all the files in the target directory to see if they exist or not. This scan is done in the background (unless you specifiy `--target_scan_await=true`). You can also skip the scan with `--target_skip_scan=true`. While the scan is being performed (or if it's skipped), we use fs.stats or getHeadCommand on indvidual files to check if they exist. + +Overwrite can be enforced by specifying `--target_overwrite=true` + +Files can be optionally gzipped by specifying `--target_gzip=true`. A `.gz` extension will be added to the filename and the content will be gzipped. diff --git a/output.js b/output.js index 516215b..fba3984 100644 --- a/output.js +++ b/output.js @@ -43,8 +43,6 @@ module.exports = async function(obj, argv) { } } - argv.target_gzip = dest && dest.match(/\.gz$/ig); - // Load custom config for the target_type or output const conf = nconf.get(dest) || {}; for (const key in conf) @@ -98,6 +96,14 @@ module.exports = async function(obj, argv) { stream = stream.pipe(etl.map(d => { Σ_in++; + if (d.filename && d.body && argv.target_gzip) { + d.filename += '.gz'; + const uncompressed = d.body; + d.body = async function() { + const body = await uncompressed(true); + return body.pipe(require('zlib').createGzip()); + }; + } if (typeof d.body == 'function') { d.buffer = async function() { let body = await d.body(true); diff --git a/package.json b/package.json index c3f8a52..80437b2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "etl-cli", - "version": "0.1.11", + "version": "0.1.12", "description": "", "main": "index.js", "scripts": { diff --git a/test/files-test.js b/test/files-test.js index b5e68b4..5ba79a1 100644 --- a/test/files-test.js +++ b/test/files-test.js @@ -92,4 +92,15 @@ tap.test('files', async t => { t.same( String(await data[0].buffer()), 'This is file B'); t.same( String(await data[1].buffer()), 'This is file A'); }); + + t.test('writing files with target_gzip', async () => { + const cmd = `etl files/${__dirname}/support/testfiles files/${tmpDir}/testfiles/ --silent --target_gzip=true`; + const res = await cli(cmd); + t.same(res, { 'Σ_in': 2, 'Σ_out': 2 }); + + // check one of the files to ensure it is gzipped + const buffer = fs.readFileSync(path.resolve(tmpDir, 'testfiles', 'fileB.txt.gz')); + const text = require('zlib').gunzipSync(buffer).toString(); + t.same(text, 'This is file B'); + }); }); \ No newline at end of file diff --git a/test/s3-test.js b/test/s3-test.js index 2fc0ffb..f5ee3c7 100644 --- a/test/s3-test.js +++ b/test/s3-test.js @@ -78,4 +78,19 @@ tap.test('s3files', async t => { t.same( String(await data[0].buffer()), 'This is file B'); t.same( String(await data[1].buffer()), 'This is file A'); }); + + t.test('uploading files again to s3 with target_gzip', async t => { + const cmd = `etl files/${__dirname}/support/testfiles s3files/${Bucket}/test/gzip --target_endpoint=http://localhost:9090 --target_forcePathStyle=true --target_gzip=true`; + const res = await cli(cmd); + t.same(res, { Σ_in: 2, Σ_out: 2 }, 'overwrite files that already exist'); + }); + + t.test('reading gzipped files from s3', async t => { + const cmd = `etl s3files/${Bucket}/test/gzip test --silent --source_endpoint=http://localhost:9090 --source_forcePathStyle=true`; + const res = await cli(cmd); + const buffer = await res.data[0].buffer(); + const text = require('zlib').gunzipSync(buffer).toString(); + t.same( res.data[0].filename, 'fileB.txt.gz'); + t.same( text, 'This is file B'); + }); }); \ No newline at end of file