Skip to content

Commit

Permalink
fix list objects ordering to follow AWS S3
Browse files Browse the repository at this point in the history
Signed-off-by: Utkarsh Srivastava <[email protected]>

add more tests for asserting the correctness of utf8 sorting

Signed-off-by: Utkarsh Srivastava <[email protected]>

fix order when markers are involved

Signed-off-by: Utkarsh Srivastava <[email protected]>
  • Loading branch information
tangledbytes committed Sep 5, 2024
1 parent 0d9d1f7 commit 0f7adb7
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 18 deletions.
30 changes: 12 additions & 18 deletions src/sdk/namespace_fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const stream_utils = require('../util/stream_utils');
const buffer_utils = require('../util/buffer_utils');
const size_utils = require('../util/size_utils');
const native_fs_utils = require('../util/native_fs_utils');
const js_utils = require('../util/js_utils');
const ChunkFS = require('../util/chunk_fs');
const LRUCache = require('../util/lru_cache');
const nb_native = require('../util/nb_native');
Expand Down Expand Up @@ -91,14 +92,14 @@ const XATTR_METADATA_IGNORE_LIST = [
];

/**
* sort_entries_by_name uses byte order instead of
* UTF-8 ordering to determine sort order.
* @param {fs.Dirent} a
* @param {fs.Dirent} b
* @returns {1|-1|0}
*/
function sort_entries_by_name(a, b) {
if (a.name < b.name) return -1;
if (a.name > b.name) return 1;
return 0;
return Buffer.compare(Buffer.from(a.name, 'utf8'), Buffer.from(b.name, 'utf8'));
}

function _get_version_id_by_stat({ino, mtimeNsBigint}) {
Expand Down Expand Up @@ -250,14 +251,6 @@ function is_sparse_file(stat) {
return (stat.blocks * 512 < stat.size);
}

/**
* @param {fs.Dirent} e
* @returns {string}
*/
function get_entry_name(e) {
return e.name;
}

/**
* @param {string} name
* @returns {fs.Dirent}
Expand Down Expand Up @@ -685,9 +678,10 @@ class NamespaceFS {
// Since versions are arranged next to latest object in the latest first order,
// no need to find the sorted last index. Push the ".versions/#VERSION_OBJECT" as
// they are in order
if (results.length && r.key < results[results.length - 1].key &&
const r_key_buf = Buffer.from(r.key, 'utf8');
if (results.length && Buffer.compare(r_key_buf, Buffer.from(results[results.length - 1].key, 'utf8')) === -1 &&
!this._is_hidden_version_path(r.key)) {
pos = _.sortedLastIndexBy(results, r, a => a.key);
pos = js_utils.sortedLastIndexBy(results, curr => Buffer.from(curr.key, 'utf8').compare(r_key_buf) === -1);
} else {
pos = results.length;
}
Expand Down Expand Up @@ -717,7 +711,7 @@ class NamespaceFS {
const process_entry = async ent => {
// dbg.log0('process_entry', dir_key, ent.name);
if ((!ent.name.startsWith(prefix_ent) ||
ent.name < marker_curr ||
Buffer.compare(Buffer.from(ent.name, 'utf8'), Buffer.from(marker_curr, 'utf8')) === -1 ||
ent.name === this.get_bucket_tmpdir_name() ||
ent.name === config.NSFS_FOLDER_OBJECT_NAME) ||
this._is_hidden_version_path(ent.name)) {
Expand Down Expand Up @@ -783,10 +777,10 @@ class NamespaceFS {
{name: start_marker}
) + 1;
} else {
marker_index = _.sortedLastIndexBy(
sorted_entries,
make_named_dirent(marker_curr),
get_entry_name
const marker_curr_buf = Buffer.from(make_named_dirent(marker_curr).name, 'utf8');
marker_index = js_utils.sortedLastIndexBy(sorted_entries, curr =>
Buffer.from(curr.name, 'utf8').compare(marker_curr_buf) === -1 ||
Buffer.from(curr.name, 'utf8').compare(marker_curr_buf) === 0
);
}

Expand Down
79 changes: 79 additions & 0 deletions src/test/unit_tests/jest_tests/js_utils.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/* Copyright (C) 2024 NooBaa */
'use strict';

const { sortedLastIndexBy } = require("../../../util/js_utils");

describe('test js_utils.js', () => {
describe('sortedLastIndexBy', () => {
it('should correctly find position for number array', () => {
const test_table = [{
array: [1, 3, 4, 5],
target: 0,
expected_position: 0,
}, {
array: [1, 3, 4, 5],
target: 2,
expected_position: 1,
}, {
array: [1, 3, 4, 5],
target: 6,
expected_position: 4,
}];

for (const entry of test_table) {
expect(sortedLastIndexBy(entry.array, curr => curr < entry.target)).toBe(entry.expected_position);
}
});

it('should correctly find position for string array', () => {
const test_table = [{
array: ["a", "b", "c", "d"],
target: "A",
expected_position: 0,
}, {
array: ["a", "b", "d", "e"],
target: "c",
expected_position: 2,
}, {
array: ["a", "b", "c", "d"],
target: "z",
expected_position: 4,
}];

for (const entry of test_table) {
expect(sortedLastIndexBy(entry.array, curr => curr < entry.target)).toBe(entry.expected_position);
}
});

it('should correctly find position for utf8 string (buffer) array', () => {
// Editors might not render characters in this array properly but that's not a mistake,
// it's intentional to use such characters - sourced from:
// https://forum.moonwalkinc.com/t/determining-s3-listing-order/116
const array = ["file_ꦏ_1.txt", "file__2.txt", "file__3.txt", "file_𐎣_4.txt"];
const array_of_bufs = array.map(item => Buffer.from(item, 'utf8'));

const test_table = [{
array: array_of_bufs,
target: array_of_bufs[0],
expected_position: 0,
}, {
array: array_of_bufs,
target: array_of_bufs[2],
expected_position: 2,
}, {
array: array_of_bufs,
target: array_of_bufs[3],
expected_position: 3,
}, {
array: array_of_bufs,
target: Buffer.from("file_𐎣_40.txt", 'utf8'),
expected_position: 4,
}];

for (const entry of test_table) {
expect(sortedLastIndexBy(entry.array, curr => curr.compare(entry.target) === -1)).toBe(entry.expected_position);
}
});
});
});

49 changes: 49 additions & 0 deletions src/test/unit_tests/test_ns_list_objects.js
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,55 @@ function test_ns_list_objects(ns, object_sdk, bucket) {

});

mocha.describe('utf8 specific basic tests', function() {
// source of keys: https://forum.moonwalkinc.com/t/determining-s3-listing-order/116
const strange_utf8_keys = ["file_ꦏ_1.txt", "file__2.txt", "file__3.txt", "file_𐎣_4.txt"];

mocha.before(async function() {
await create_keys([
...strange_utf8_keys,
]);
});
mocha.after(async function() {
await delete_keys([
...strange_utf8_keys,
]);
});

mocha.it('verify byte-by-byte order sort for utf8 encoded keys', async function() {
const r = await ns.list_objects({ bucket }, object_sdk);
assert.deepStrictEqual(r.is_truncated, false);
assert.deepStrictEqual(r.common_prefixes, []);
assert.deepStrictEqual(r.objects.map(it => it.key), strange_utf8_keys);
});

mocha.it('verify byte-by-byte order sort for utf8 encoded keys with markers', async function() {
const test_table = [
{
limit: 1,
},
{
limit: 2,
},
{
limit: 3,
},
{
limit: 4,
},
{
limit: 5,
}
];
for (const test of test_table) {
const r = await truncated_listing({ bucket, limit: test.limit });
assert.deepStrictEqual(r.is_truncated, false);
assert.deepStrictEqual(r.common_prefixes, []);
assert.deepStrictEqual(r.objects.map(it => it.key), strange_utf8_keys);
}
});
});

mocha.describe('list objects - dirs', function() {

this.timeout(10 * 60 * 1000); // eslint-disable-line no-invalid-this
Expand Down
26 changes: 26 additions & 0 deletions src/util/js_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,31 @@ function omit_symbol(maybe_obj, sym) {
return _.omit(obj, sym);
}

/**
* sortedLastIndexBy is like lodash's sortedLastIndexBy except that
* instead of taking a iteratee, it takes a function `less` which should
* return `true` when `curr < value`.
* @template T
* @param {Array<T>} array
* @param {(curr: T) => Boolean} less
* @returns
*/
function sortedLastIndexBy(array, less) {
let low = 0;
let high = array === null ? low : array.length;

while (low < high) {
const mid = Math.floor(low + ((high - low) / 2));
if (less(array[mid])) {
low = mid + 1;
} else {
high = mid;
}
}

return high;
}

exports.self_bind = self_bind;
exports.array_push_all = array_push_all;
exports.array_push_keep_latest = array_push_keep_latest;
Expand All @@ -250,3 +275,4 @@ exports.inspect_lazy = inspect_lazy;
exports.make_array = make_array;
exports.map_get_or_create = map_get_or_create;
exports.omit_symbol = omit_symbol;
exports.sortedLastIndexBy = sortedLastIndexBy;

0 comments on commit 0f7adb7

Please sign in to comment.