Skip to content

Commit

Permalink
Don't emit wide concatenation assignments from DFG
Browse files Browse the repository at this point in the history
When converting DFGs back to ASTs, try not to emit a wide concatenation
as the root of an expression tree. That is, instead of:

foo = {l, r};

generate this (recursively, if 'l' or 'r' are concatenations themselves)
the following, with the right indices, iff the concatenation straddles a
wide word boundary.

foo[_:_] = r;
foo[_:_] = l;

This eliminates more wide temporaries.

Another 14% speedup on VeeR EH2 high_perf. Also brings the predicted
stack size from 8M to 32k.
  • Loading branch information
gezalore committed Nov 9, 2024
1 parent f073b27 commit 9aef6a6
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 38 deletions.
117 changes: 81 additions & 36 deletions src/V3DfgDfgToAst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ class DfgToAstVisitor final : DfgVisitor {
// Map from an AstVar, to the canonical AstVar that can be substituted for that AstVar
std::unordered_map<AstVar*, AstVar*> m_canonVars;
V3UniqueNames m_tmpNames{"__VdfgTmp"}; // For generating temporary names
const bool m_lastInvocation; // Final run of DFG

// METHODS

Expand All @@ -151,49 +152,91 @@ class DfgToAstVisitor final : DfgVisitor {
return resultp;
}

void convertDfgVertexToAstNodeExprsRecursive(DfgVertex* vtxp,
std::vector<AstNodeExpr*>& exprps,
uint32_t offset) {
// If it's a concat straddling a word boundary, emit it in parts
if (DfgConcat* const concatp = vtxp->cast<DfgConcat>()) {
const uint32_t lsbWord = offset / VL_EDATASIZE;
const uint32_t msbWord = (offset + concatp->width() - 1) / VL_EDATASIZE;
if (lsbWord != msbWord) {
++m_ctx.m_resultSplits;
convertDfgVertexToAstNodeExprsRecursive(concatp->rhsp(), exprps, offset);
convertDfgVertexToAstNodeExprsRecursive(concatp->lhsp(), exprps,
offset + concatp->rhsp()->width());
return;
}
}

exprps.emplace_back(convertDfgVertexToAstNodeExpr(vtxp));
}

std::vector<AstNodeExpr*> convertDfgVertexToAstNodeExprs(DfgVertex* vtxp) {
std::vector<AstNodeExpr*> exprps;
// No need to bother with non-wides. Also, next round DFG coalesces
// assignments, so only do it on the last DFG pass
if (vtxp->width() <= VL_QUADSIZE || !m_lastInvocation) {
exprps.emplace_back(convertDfgVertexToAstNodeExpr(vtxp));
} else {
convertDfgVertexToAstNodeExprsRecursive(vtxp, exprps, 0);
}
return exprps;
}

void addResultEquation(FileLine* flp, AstNodeExpr* lhsp, AstNodeExpr* rhsp) {
m_modp->addStmtsp(new AstAssignW{flp, lhsp, rhsp});
++m_ctx.m_resultEquations;
}

void convertVarDriver(const DfgVarPacked* dfgVarp) {
if (dfgVarp->isDrivenFullyByDfg()) {
// Whole variable is driven. Render driver and assign directly to whole variable.
FileLine* const flp = dfgVarp->driverFileLine(0);
AstVarRef* const lhsp = new AstVarRef{flp, dfgVarp->varp(), VAccess::WRITE};
AstNodeExpr* const rhsp = convertDfgVertexToAstNodeExpr(dfgVarp->source(0));
addResultEquation(flp, lhsp, rhsp);
} else {
// Variable is driven partially. Render each driver as a separate assignment.
dfgVarp->forEachSourceEdge([&](const DfgEdge& edge, size_t idx) {
UASSERT_OBJ(edge.sourcep(), dfgVarp, "Should have removed undriven sources");
// Render the rhs expression
AstNodeExpr* const rhsp = convertDfgVertexToAstNodeExpr(edge.sourcep());
// Create select LValue
FileLine* const flp = dfgVarp->driverFileLine(idx);
dfgVarp->forEachSourceEdge([&](const DfgEdge& edge, size_t idx) {
UASSERT_OBJ(edge.sourcep(), dfgVarp, "Should have removed undriven sources");
FileLine* const flp = dfgVarp->driverFileLine(idx);
uint32_t lsb = dfgVarp->driverLsb(idx);
// Render the rhs expression, possibly in parts if a concatenation
for (AstNodeExpr* const rhsp : convertDfgVertexToAstNodeExprs(edge.sourcep())) {
const uint32_t rWidth = static_cast<uint32_t>(rhsp->width());
UASSERT_OBJ(lsb < dfgVarp->width(), dfgVarp, "Inconsistent driver");
AstVarRef* const refp = new AstVarRef{flp, dfgVarp->varp(), VAccess::WRITE};
AstConst* const lsbp = new AstConst{flp, dfgVarp->driverLsb(idx)};
AstConst* const widthp = new AstConst{flp, edge.sourcep()->width()};
AstSel* const lhsp = new AstSel{flp, refp, lsbp, widthp};
// Add assignment of the value to the selected bits
addResultEquation(flp, lhsp, rhsp);
});
}
if (rWidth == dfgVarp->width()) {
UASSERT_OBJ(lsb == 0, dfgVarp, "Inconsistent driver");
addResultEquation(flp, refp, rhsp);
} else {
AstConst* const lsbp = new AstConst{flp, lsb};
AstConst* const widthp = new AstConst{flp, rWidth};
AstSel* const lhsp = new AstSel{flp, refp, lsbp, widthp};
addResultEquation(flp, lhsp, rhsp);
}
lsb += rWidth;
}
});
}

void convertArrayDiver(const DfgVarArray* dfgVarp) {
// Variable is driven partially. Assign from parts of the canonical var.
AstVar* const astVarp = dfgVarp->varp();
const uint32_t elemWidth = static_cast<uint32_t>(astVarp->dtypep()->subDTypep()->width());
dfgVarp->forEachSourceEdge([&](const DfgEdge& edge, size_t idx) {
UASSERT_OBJ(edge.sourcep(), dfgVarp, "Should have removed undriven sources");
// Render the rhs expression
AstNodeExpr* const rhsp = convertDfgVertexToAstNodeExpr(edge.sourcep());
// Create select LValue
FileLine* const flp = dfgVarp->driverFileLine(idx);
AstVarRef* const refp = new AstVarRef{flp, dfgVarp->varp(), VAccess::WRITE};
AstConst* const idxp = new AstConst{flp, dfgVarp->driverIndex(idx)};
AstArraySel* const lhsp = new AstArraySel{flp, refp, idxp};
// Add assignment of the value to the selected bits
addResultEquation(flp, lhsp, rhsp);
uint32_t lsb = 0;
// Render the rhs expression, possibly in parts if a concatenation
for (AstNodeExpr* const rhsp : convertDfgVertexToAstNodeExprs(edge.sourcep())) {
const uint32_t rWidth = static_cast<uint32_t>(rhsp->width());
UASSERT_OBJ(lsb < elemWidth, dfgVarp, "Inconsistent driver");
AstVarRef* const refp = new AstVarRef{flp, astVarp, VAccess::WRITE};
AstConst* const idxp = new AstConst{flp, dfgVarp->driverIndex(idx)};
AstArraySel* const aselp = new AstArraySel{flp, refp, idxp};
if (rWidth == elemWidth) {
UASSERT_OBJ(lsb == 0, dfgVarp, "Inconsistent driver");
addResultEquation(flp, aselp, rhsp);
} else {
AstConst* const lsbp = new AstConst{flp, lsb};
AstConst* const widthp = new AstConst{flp, rWidth};
AstSel* const lhsp = new AstSel{flp, aselp, lsbp, widthp};
addResultEquation(flp, lhsp, rhsp);
}
lsb += rWidth;
}
});
}

Expand Down Expand Up @@ -234,9 +277,10 @@ class DfgToAstVisitor final : DfgVisitor {
#include "V3Dfg__gen_dfg_to_ast.h"

// Constructor
explicit DfgToAstVisitor(DfgGraph& dfg, V3DfgOptimizationContext& ctx)
explicit DfgToAstVisitor(DfgGraph& dfg, V3DfgOptimizationContext& ctx, bool lastInvocation)
: m_modp{dfg.modulep()}
, m_ctx{ctx} {
, m_ctx{ctx}
, m_lastInvocation{lastInvocation} {
// Convert the graph back to combinational assignments

// The graph must have been regularized, so we only need to render assignments
Expand All @@ -256,11 +300,12 @@ class DfgToAstVisitor final : DfgVisitor {
}

public:
static AstModule* apply(DfgGraph& dfg, V3DfgOptimizationContext& ctx) {
return DfgToAstVisitor{dfg, ctx}.m_modp;
static AstModule* apply(DfgGraph& dfg, V3DfgOptimizationContext& ctx, bool lastInvocation) {
return DfgToAstVisitor{dfg, ctx, lastInvocation}.m_modp;
}
};

AstModule* V3DfgPasses::dfgToAst(DfgGraph& dfg, V3DfgOptimizationContext& ctx) {
return DfgToAstVisitor::apply(dfg, ctx);
AstModule* V3DfgPasses::dfgToAst(DfgGraph& dfg, V3DfgOptimizationContext& ctx,
bool lastInvocation) {
return DfgToAstVisitor::apply(dfg, ctx, lastInvocation);
}
2 changes: 1 addition & 1 deletion src/V3DfgOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label, bool la

// Convert back to Ast
if (dumpDfgLevel() >= 8) dfg->dumpDotFilePrefixed(ctx.prefix() + "whole-optimized");
AstModule* const resultModp = V3DfgPasses::dfgToAst(*dfg, ctx);
AstModule* const resultModp = V3DfgPasses::dfgToAst(*dfg, ctx, lastInvocation);
UASSERT_OBJ(resultModp == modp, modp, "Should be the same module");
}

Expand Down
1 change: 1 addition & 0 deletions src/V3DfgPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ V3DfgOptimizationContext::~V3DfgOptimizationContext() {
V3Stats::addStat(prefix + "Ast2Dfg, non-representable (var ref)", m_nonRepVarRef);
V3Stats::addStat(prefix + "Ast2Dfg, non-representable (width)", m_nonRepWidth);
V3Stats::addStat(prefix + "Dfg2Ast, result equations", m_resultEquations);
V3Stats::addStat(prefix + "Dfg2Ast, result splits", m_resultSplits);

// Print the collected patterns
if (v3Global.opt.stats()) {
Expand Down
3 changes: 2 additions & 1 deletion src/V3DfgPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class V3DfgOptimizationContext final {
VDouble0 m_nonRepVarRef; // Equations non-representable due to variable reference
VDouble0 m_nonRepWidth; // Equations non-representable due to width mismatch
VDouble0 m_resultEquations; // Number of result combinational equations
VDouble0 m_resultSplits; // Number of splits in result equations

V3DfgCseContext m_cseContext0{m_label + " 1st"};
V3DfgCseContext m_cseContext1{m_label + " 2nd"};
Expand Down Expand Up @@ -128,7 +129,7 @@ void optimize(DfgGraph&, V3DfgOptimizationContext&, bool lastInvocation) VL_MT_D

// Convert DfgGraph back into Ast, and insert converted graph back into its parent module.
// Returns the parent module.
AstModule* dfgToAst(DfgGraph&, V3DfgOptimizationContext&) VL_MT_DISABLED;
AstModule* dfgToAst(DfgGraph&, V3DfgOptimizationContext&, bool lastInvocation) VL_MT_DISABLED;

//===========================================================================
// Intermediate/internal operations
Expand Down
2 changes: 2 additions & 0 deletions test_regress/t/t_dfg_balance_cats.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@
r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0)
test.file_grep(test.stats,
r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1)
test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result splits\s+(\d+)', 0)
test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result splits\s+(\d+)', 62)

test.passes()

0 comments on commit 9aef6a6

Please sign in to comment.