Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype: accumulate attrset updates, perform k-way merge #11290

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 142 additions & 4 deletions src/libexpr/eval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -999,10 +999,6 @@ void EvalState::mkSingleDerivedPathString(
}


/* Create a thunk for the delayed computation of the given expression
in the given environment. But if the expression is a variable,
then look it up right away. This significantly reduces the number
of thunks allocated. */
Value * Expr::maybeThunk(EvalState & state, Env & env)
{
Value * v = state.allocValue();
Expand Down Expand Up @@ -1899,6 +1895,112 @@ void ExprOpImpl::eval(EvalState & state, Env & env, Value & v)

void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
{

UpdateQueue q;

e1->evalForUpdate(state, env, q);
e2->evalForUpdate(state, env, q);
Comment on lines +1901 to +1902
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
e1->evalForUpdate(state, env, q);
e2->evalForUpdate(state, env, q);
evalForUpdate(state, env, q);


// k-way merge

size_t capacity = 0;

struct BindingsCursor {
const Bindings * bindings;
Bindings::size_t offset;

const Attr & front() {
return (*bindings)[offset];
}
/**
* @return false if there are no more elements
*/
bool pop_front() {
offset++;
return offset < bindings->size();
}
};

// a nested queue where the key is equal to the first and lowest name
std::map<std::pair<Symbol, int>, BindingsCursor> next;

// first fill next
int bias = 0;
for (auto b : q) {
if (b->size() == 0) continue;
capacity += b->size();
next.insert_or_assign({b->begin()->name, bias}, BindingsCursor {b, 0});

// The `//` operator is "right-biased", which means `{ a = "L"; } // { a = "R"; }`
// will result in `{ a = "R"; }`.
// This means that we want to find the right most value first, so that
// we can then ignore the earlier attributes of the same name.
// Decreasing the bias integer achieves this.
bias--;
}

auto r = state.allocBindings(capacity);

{
auto it = state.nrMultiConcats.find(capacity);
if (it != state.nrMultiConcats.end()) {
it->second++;
} else {
state.nrMultiConcats[capacity] = 1;
}
}

// std::cerr << "capacity: " << r->capacity() << std::endl;

// Consume the lowest item until empty.
// This changes the order of next, so we need to reinsert it each time.

// The last inserted name, so that we can easily ignore all the values that aren't right-most in the sequence of `//` operations.
// Symbol() is a _null object_ that doesn't match any real symbols.
Symbol lastInsertedName;
while (true) {
auto cur = next.begin();
if (cur == next.end()) break;

// std::cerr << "size: " << r->size() << std::endl;

// std::cerr << "processing attr: " << state.symbols[cur->first] << std::endl;

// Grow `r`
if (lastInsertedName != cur->first.first) {
r->push_back(cur->second.front());
lastInsertedName = cur->first.first;
}

// Shrink `next`
int bias = cur->first.second;

if (cur->second.pop_front()) {
std::pair<Symbol, int> key = {cur->second.front().name, bias};
next.emplace(key, std::move(cur->second));
}
next.erase(cur);
}

// TODO check the size of the bindings. If significantly below capacity, shrink it.

if (capacity > 0) {
auto capacityFraction = (100 * r->size()) / capacity;
auto it = state.nrMultiConcatCapacityPercent.find(capacityFraction);
auto wastedAttrs = capacity - r->size();
if (it != state.nrMultiConcatCapacityPercent.end()) {
it->second += wastedAttrs;
} else {
state.nrMultiConcatCapacityPercent[capacityFraction] = wastedAttrs;
}
state.nrMultiConcatCapacityTotalWaste += wastedAttrs;
}


v.mkAttrs(r);


#if 0
Value v1, v2;
state.evalAttrs(env, e1, v1, pos, "in the left operand of the update (//) operator");
state.evalAttrs(env, e2, v2, pos, "in the right operand of the update (//) operator");
Expand Down Expand Up @@ -1932,6 +2034,22 @@ void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
v.mkAttrs(attrs.alreadySorted());

state.nrOpUpdateValuesCopied += v.attrs()->size();
#endif
}

void Expr::evalForUpdate(EvalState & state, Env & env, UpdateQueue & q)
{
Value vTmp;
eval(state, env, vTmp);
// TODO add pos and errorCtx params
state.forceAttrs(vTmp, noPos, "while evaluating an attribute set merge operand");
Comment on lines +2043 to +2045
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

evalAttrs?

q.push_back(vTmp.attrs());
}

void ExprOpUpdate::evalForUpdate(EvalState &state, Env &env, UpdateQueue &q)
{
e1->evalForUpdate(state, env, q);
e2->evalForUpdate(state, env, q);
}


Expand Down Expand Up @@ -2905,6 +3023,26 @@ void EvalState::printStatistics()
};
topObj["nrOpUpdates"] = nrOpUpdates;
topObj["nrOpUpdateValuesCopied"] = nrOpUpdateValuesCopied;
topObj["nrOpMultiUpdate"] = {};
for (auto [k, n] : nrMultiConcats)
topObj["nrsKWayUpdate"][fmt("%05i", k)] = n;
topObj["nrMultiConcatCapacityPercent"] = {};
for (auto [k, n] : nrMultiConcatCapacityPercent)
topObj["nrMultiConcatCapacityPercent"][fmt("%03i", k)] = n;

topObj["nrMultiConcatCapacityCumulativePercent"] = {};
{
auto c = 0;
for (auto [k, n] : nrMultiConcatCapacityPercent) {
c += n;
topObj["nrMultiConcatCapacityCumulativePercent"][fmt("%03i", k)] = c;
}
}
topObj["nrMultiConcatCapacityTotalWaste"] = nrMultiConcatCapacityTotalWaste;
#if HAVE_BOEHMGC
topObj["nrMultiConcatCapacityTotalWastePercent"] = nrMultiConcatCapacityTotalWaste * sizeof(Attr) * 100.0 / totalBytes;
#endif

topObj["nrThunks"] = nrThunks;
topObj["nrAvoided"] = nrAvoided;
topObj["nrLookups"] = nrLookups;
Expand Down
3 changes: 3 additions & 0 deletions src/libexpr/eval.hh
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,9 @@ private:
unsigned long nrOpUpdates = 0;
unsigned long nrOpUpdateValuesCopied = 0;
unsigned long nrListConcats = 0;
std::map<int, int> nrMultiConcats;
std::map<int, int> nrMultiConcatCapacityPercent;
unsigned long nrMultiConcatCapacityTotalWaste = 0;
unsigned long nrPrimOpCalls = 0;
unsigned long nrFunctionCalls = 0;

Expand Down
57 changes: 56 additions & 1 deletion src/libexpr/nixexpr.hh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <map>
#include <vector>

#include "gc-small-vector.hh"
#include "value.hh"
#include "symbol-table.hh"
#include "eval-error.hh"
Expand Down Expand Up @@ -74,6 +75,7 @@ typedef std::vector<AttrName> AttrPath;

std::string showAttrPath(const SymbolTable & symbols, const AttrPath & attrPath);

using UpdateQueue = SmallVector<const Bindings *, 64>;

/* Abstract syntax of Nix expressions. */

Expand All @@ -91,8 +93,25 @@ struct Expr
virtual ~Expr() { };
virtual void show(const SymbolTable & symbols, std::ostream & str) const;
virtual void bindVars(EvalState & es, const std::shared_ptr<const StaticEnv> & env);

/** Normal evaluation, implemented directly by all subclasses. */
virtual void eval(EvalState & state, Env & env, Value & v);

/**
* Create a thunk for the delayed computation of the given expression
* in the given environment. But if the expression is a variable,
* then look it up right away. This significantly reduces the number
* of thunks allocated.
*/
virtual Value * maybeThunk(EvalState & state, Env & env);

/**
* Only called when performing an attrset update: `//` or similar.
* Instead of writing to a Value &, this function writes to an UpdateQueue.
* This allows the expression to perform multiple updates in a delayed manner, gathering up all the updates before applying them.
*/
virtual void evalForUpdate(EvalState & state, Env & env, UpdateQueue & q);

virtual void setName(Symbol name);
virtual void setDocComment(DocComment docComment) { };
virtual PosIdx getPos() const { return noPos; }
Expand Down Expand Up @@ -430,9 +449,45 @@ MakeBinOp(ExprOpNEq, "!=")
MakeBinOp(ExprOpAnd, "&&")
MakeBinOp(ExprOpOr, "||")
MakeBinOp(ExprOpImpl, "->")
MakeBinOp(ExprOpUpdate, "//")
MakeBinOp(ExprOpConcatLists, "++")

struct ExprOpUpdate : Expr
{
PosIdx pos;
Expr *e1, *e2;
ExprOpUpdate(Expr * e1, Expr * e2)
: e1(e1)
, e2(e2){};
ExprOpUpdate(const PosIdx & pos, Expr * e1, Expr * e2)
: pos(pos)
, e1(e1)
, e2(e2){};
void show(const SymbolTable & symbols, std ::ostream & str) const override
{
str << "(";
e1->show(symbols, str);
str << " "
"//"
" ";
e2->show(symbols, str);
str << ")";
}
void bindVars(EvalState & es, const std ::shared_ptr<const StaticEnv> & env) override
{
e1->bindVars(es, env);
e2->bindVars(es, env);
}
void eval(EvalState & state, Env & env, Value & v) override;
PosIdx getPos() const override
{
return pos;
}

virtual void evalForUpdate(EvalState & state, Env & env, UpdateQueue & q) override;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only new code in this declaration. The rest is just an expansion of the MakeBinOp macro.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should make a MakeBinOpMembers macro that MakeBinOp this can both uses?


};


struct ExprConcatStrings : Expr
{
PosIdx pos;
Expand Down
Loading