Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolves #135: DocLayer is not able to handle sparse arrays correctly. #165

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions src/QLExpression.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ ACTOR static Future<Void> doPathExpansionIfNotArray(PromiseStream<Reference<IRea
* - Case 3: Component after array root is numeric and treating it as field name. Ex: a.b.1.c, b is array
* of size 3 and treating "1" as field name. This can be expanded to a.b.0.1.c, a.b.1.1.c and a.b.2.1.c. As
* Doc Layer serializes numeric field name and array index same way, we have to pass this information.
*
* NOTE: MongoDB does NOT do nested array expansion. For exmaple if the query is a.c and a happens to be an array of
* size 2, the query will be expanded into a.0.c and a.1.c. Then if a[0] is again an array, it will NOT do the expansion
* again. That's why in case 3 we call doPathExpansionIfNotArray(), instead of the normal doPathExpansion().
*/
ACTOR static Future<Void> doArrayExpansion(PromiseStream<Reference<IReadContext>> promises,
Standalone<StringRef> queryPath,
Expand Down Expand Up @@ -126,6 +130,7 @@ ACTOR static Future<Void> doArrayExpansion(PromiseStream<Reference<IReadContext>

// Case 3 - Treat next component as field name.
if (nextComponentIsNumeric) {
// Do not do array expansion again.
futures.push_back(doPathExpansionIfNotArray(promises, StringRef(newPath), arrayElementPath, document,
expandLastArray, imputeNulls));
} else { // Case 1
Expand Down Expand Up @@ -169,6 +174,12 @@ ACTOR static Future<Void> doPathExpansion(PromiseStream<Reference<IReadContext>>
// expansion. So we also need to check that if the "most recent existing ancestor" is a primitive value, then
// its direct parent is also not an array. If the MREA is an object, then no check is necessary, since MongoDB
// switches back into null-imputing mode if that happens (even if we expanded an array right before).

// NOTE: since arrays are stored as sparse arrays, i.e. null values in the array will not be persisted at all
// it has to impute a null if:
// - last part of the path is numeric
// - it's immediate parent is an array
// - it's within the length of the array
if (imputeNulls && !v.present() && !arrayAncestor.present()) {
std::vector<Future<Optional<DataValue>>> futureAncestors;
futureAncestors.push_back(document->get(StringRef()));
Expand All @@ -177,14 +188,19 @@ ACTOR static Future<Void> doPathExpansion(PromiseStream<Reference<IReadContext>>

std::vector<Optional<DataValue>> ancestors = wait(getAll(futureAncestors));

for (auto j = ancestors.rbegin();
!(j == ancestors.rend()) /* yes, it can be !=, but MSVC has a bug, hence !(==)*/; ++j) {
if (j->present()) {
DVTypeCode type = j->get().getSortType();
for (int j = ancestors.size() - 1; j >= 0; --j) {
if (ancestors[j].present()) {
DVTypeCode type = ancestors[j].get().getSortType();
if (type == DVTypeCode::OBJECT ||
(type != DVTypeCode::ARRAY &&
(j + 1 == ancestors.rend() || (j + 1)->get().getSortType() != DVTypeCode::ARRAY))) {
(j == 0 || ancestors[j - 1].get().getSortType() != DVTypeCode::ARRAY))) {
promises.send(Reference<NullContext>(new NullContext()));
} else if (j == ancestors.size() - 2 && dk[dk.size() - 1][0] == (uint8_t)DVTypeCode::NUMBER &&
type == DVTypeCode::ARRAY) {
DataValue dv = DataValue::decode_key_part(dk[dk.size() - 1]);
if (dv.getDouble() < ancestors[j].get().getArraysize()) {
promises.send(Reference<NullContext>(new NullContext()));
}
}
break;
}
Expand Down
3 changes: 1 addition & 2 deletions test/correctness/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,8 @@ def random_id_value():
def random_value():
r = global_prng.random()
while True:
# TODO re-enable None value generating
# if (r < 0.1) and generator_options.test_nulls:
# val = None
# val = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this patch, is it time to re-enable None value tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do in the PR that adds deletion tests

if (r < 0.2):
val = random_float()
elif (r < 0.4):
Expand Down
65 changes: 31 additions & 34 deletions test/correctness/mongo_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,11 +727,11 @@ def process_update_operator_rename(self, key, update_expression):
def process_update_operator_set_on_insert(self, key, update_expression, new_doc=False):
# print "Update Operator: $setOnInsert ", key, update_expression
if new_doc:
self.data[key] = OrderedDict(self.data[key].items() + update_expression.items())
self.data[key] = OrderedDict(self.data[key].items() + deepcopy(update_expression.items()))

def process_update_operator_set(self, key, update_expression):
# print "Update Operator: $set ", update
self.data[key] = OrderedDict(self.data[key].items() + update_expression.items())
self.data[key] = OrderedDict(self.data[key].items() + deepcopy(update_expression.items()))

def process_update_operator_unset(self, key, update_expression):
# print "Update Operator: $unset ", update
Expand Down Expand Up @@ -835,11 +835,9 @@ def process_update_operator_pull_all(self, key, update_expression):
raise MongoModelException("Cannot apply $pullAll to a non-array field.", code=10142)

def evaluate(self, query, document):
if len(query) == 0:
# If we made it here, the evaluate function must NOT be called from the `find` function, and thus
# we can return false. Note in find() function, an empty query means maches all documents.
return False
acc = True
if len(query) == 0:
return len(document) == 0
for field in query.keys():
if field == '_id':
tmp = OrderedDict()
Expand Down Expand Up @@ -960,21 +958,15 @@ def has_operator_expressions(doc):

def process_update_operator(self, key, update, new_doc=False):
op_update = self.has_operator_expressions(update)
old_data = None

try:
old_data = deepcopy(self.data[key])
if op_update:
for k in update:
if k == '$setOnInsert':
self.mapUpdateOperator[k](key, update[k], new_doc=new_doc)
elif k in self.mapUpdateOperator:
self.mapUpdateOperator[k](key, update[k])
else:
self.replace(key, update)
except MongoModelException as e:
self.data[key] = old_data
raise e
old_data = deepcopy(self.data[key])
if op_update:
for k in update:
if k == '$setOnInsert':
self.mapUpdateOperator[k](key, update[k], new_doc=new_doc)
elif k in self.mapUpdateOperator:
self.mapUpdateOperator[k](key, update[k])
else:
self.replace(key, update)

def deep_transform_logical_operators(self, selector=None):
new_selector = {}
Expand Down Expand Up @@ -1124,26 +1116,27 @@ def update(self, query, update, upsert, multi):
raise MongoModelException('multi update only works with $ operators', code=10158)
if isOperatorUpdate:
self.validate_update_object(update)
if len(query) == 0:
return
key = query.keys()[0]
any = False
old_data = deepcopy(self.data)
n = 0
try:
if len(query) == 0:
# Update all existing docs. And since the query is empty, do NOT do upsert.
any = True
for k in self.data.keys():
n +=1
self.process_update_operator(k, update)
if not multi:
break
key = query.keys()[0]
for k, item in self.data.iteritems():
if evaluate(key, query[key], item, self.options):
any = True
n += 1
# print "Result: ", item
if len(update) > 0:
self.process_update_operator(k, update)
if not multi:
return
else:
self.replace(k, update)
if not multi:
return
self.process_update_operator(k, update)
if not multi:
break
if any:
for index in self.indexes:
if not index.inError:
Expand Down Expand Up @@ -1184,7 +1177,7 @@ def update(self, query, update, upsert, multi):
if "_id" in query:
update["_id"] = query["_id"]
self.insert(update)
else:
elif not any:
# mongoDB raise an exception for the '$setOnInsert' update operator even if the upsert is False
if '$setOnInsert' in update.keys() and len(update['$setOnInsert']) == 0:
raise MongoModelException(
Expand Down Expand Up @@ -1283,7 +1276,11 @@ def validate_and_build_entry(self, documents, first_build=False):
check_none_at_all=True,
check_none_next=True,
debug=False)
entry = entry + reduce((lambda acc, x: acc + x), map((lambda x: str(x)), _values), "")
if len(_values) == 0:
# empty array
entry = entry + '[]'
else:
entry = entry + reduce((lambda acc, x: acc + x), map((lambda x: str(x)), _values), "")
if entry in seen:
# print "[{}] in {} ? {}".format(entry, seen, entry in seen)
# print "Violating keys " + str(key)
Expand Down
1 change: 0 additions & 1 deletion test/correctness/smoke/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import pytest


@pytest.mark.xfail
def test_update_array_containing_none_value(fixture_collection):
collection = fixture_collection

Expand Down