diff --git a/common/kernel/nextpnr_types.h b/common/kernel/nextpnr_types.h index ec1ac11101..e23cd16a35 100644 --- a/common/kernel/nextpnr_types.h +++ b/common/kernel/nextpnr_types.h @@ -77,10 +77,20 @@ struct PortRef IdString port; }; +// Zero checking which works regardless if delay_t is floating or integer +inline bool is_zero_delay(delay_t delay) +{ + if constexpr (std::is_floating_point::value) { + return std::fpclassify(delay) == FP_ZERO; + } else { + return delay == 0; + } +} + // minimum and maximum delay struct DelayPair { - DelayPair() : min_delay(0), max_delay(0) {}; + DelayPair() : min_delay(0), max_delay(0){}; explicit DelayPair(delay_t delay) : min_delay(delay), max_delay(delay) {} DelayPair(delay_t min_delay, delay_t max_delay) : min_delay(min_delay), max_delay(max_delay) {} delay_t minDelay() const { return min_delay; } @@ -180,6 +190,20 @@ enum PortType PORT_INOUT = 2 }; +[[maybe_unused]] static const std::string portType_to_str(PortType typ) +{ + switch (typ) { + case PORT_IN: + return "PORT_IN"; + case PORT_OUT: + return "PORT_OUT"; + case PORT_INOUT: + return "PORT_INOUT"; + default: + NPNR_ASSERT_FALSE("Impossible PortType"); + } +} + struct PortInfo { IdString name; @@ -203,12 +227,50 @@ enum TimingPortClass TMG_IGNORE, // Asynchronous to all clocks, "don't care", and should be ignored (false path) for analysis }; +[[maybe_unused]] static const std::string timingPortClass_to_str(TimingPortClass tmg_class) +{ + switch (tmg_class) { + case TMG_CLOCK_INPUT: + return "TMG_CLOCK_INPUT"; + case TMG_GEN_CLOCK: + return "TMG_GEN_CLOCK"; + case TMG_REGISTER_INPUT: + return "TMG_REGISTER_INPUT"; + case TMG_REGISTER_OUTPUT: + return "TMG_REGISTER_OUTPUT"; + case TMG_COMB_INPUT: + return "TMG_COMB_INPUT"; + case TMG_COMB_OUTPUT: + return "TMG_COMB_OUTPUT"; + case TMG_STARTPOINT: + return "TMG_STARTPOINT"; + case TMG_ENDPOINT: + return "TMG_ENDPOINT"; + case TMG_IGNORE: + return "TMG_IGNORE"; + default: + NPNR_ASSERT_FALSE("Impossible TimingPortClass"); + } +} + enum ClockEdge { RISING_EDGE, FALLING_EDGE }; +[[maybe_unused]] static const std::string clockEdge_to_str(ClockEdge edge) +{ + switch (edge) { + case RISING_EDGE: + return "RISING_EDGE"; + case FALLING_EDGE: + return "FALLING_EDGE"; + default: + NPNR_ASSERT_FALSE("Impossible ClockEdge"); + } +} + struct TimingClockingInfo { IdString clock_port; // Port name of clock domain @@ -225,7 +287,7 @@ struct PseudoCell virtual bool getDelay(IdString fromPort, IdString toPort, DelayQuad &delay) const = 0; virtual TimingPortClass getPortTimingClass(IdString port, int &clockInfoCount) const = 0; virtual TimingClockingInfo getPortClockingInfo(IdString port, int index) const = 0; - virtual ~PseudoCell() {}; + virtual ~PseudoCell(){}; }; struct RegionPlug : PseudoCell @@ -345,13 +407,40 @@ struct CriticalPath // Segment type enum class Type { - CLK_TO_Q, // Clock-to-Q delay - SOURCE, // Delayless source - LOGIC, // Combinational logic delay - ROUTING, // Routing delay - SETUP // Setup time in sink + CLK_TO_CLK, // Clock to clock delay + CLK_SKEW, // Clock skew + CLK_TO_Q, // Clock-to-Q delay + SOURCE, // Delayless source + LOGIC, // Combinational logic delay + ROUTING, // Routing delay + SETUP, // Setup time in sink + HOLD // Hold time in sink }; + [[maybe_unused]] static const std::string type_to_str(Type typ) + { + switch (typ) { + case Type::CLK_TO_CLK: + return "clk-to-clk"; + case Type::CLK_SKEW: + return "clk-skew"; + case Type::CLK_TO_Q: + return "clk-to-q"; + case Type::SOURCE: + return "source"; + case Type::LOGIC: + return "logic"; + case Type::ROUTING: + return "routing"; + case Type::SETUP: + return "setup"; + case Type::HOLD: + return "hold"; + default: + NPNR_ASSERT_FALSE("Impossible Segment::Type"); + } + } + // Type Type type; // Net name (routing only) @@ -361,17 +450,15 @@ struct CriticalPath // To cell.port std::pair to; // Segment delay - DelayPair delay; + delay_t delay; }; // Clock pair ClockPair clock_pair; - // Total path delay - DelayPair delay; - // if delay.minDelay() < bound.minDelay() then this is a hold violation - // if delay.maxDelay() > bound.maxDelay() then this is a setup violation - DelayPair bound; + // if sum[segments.delay] < 0 this is a hold/min violation + // if sum[segments.delay] > max_delay this is a setup/max violation + delay_t max_delay; // Individual path segments std::vector segments; @@ -402,14 +489,11 @@ struct TimingResult // Detailed net timing data dict> detailed_net_timings; - // clock to clock delays - dict, delay_t> clock_delays; - // Histogram of slack dict slack_histogram; - // TODO: Hold time violations - // dict hold_violations; + // Min delay violations, only hold time for now + std::vector min_delay_violations; }; // Represents the contents of a non-leaf cell in a design diff --git a/common/kernel/report.cc b/common/kernel/report.cc index 917740b96c..d8c78881e0 100644 --- a/common/kernel/report.cc +++ b/common/kernel/report.cc @@ -73,22 +73,14 @@ static Json::array json_report_critical_paths(const Context *ctx) {"port", segment.to.second.c_str(ctx)}, {"loc", Json::array({toLoc.x, toLoc.y})}}); - auto minDelay = ctx->getDelayNS(segment.delay.minDelay()); - auto maxDelay = ctx->getDelayNS(segment.delay.maxDelay()); - - auto segmentJson = - Json::object({{"delay", Json::array({minDelay, maxDelay})}, {"from", fromJson}, {"to", toJson}}); - - if (segment.type == CriticalPath::Segment::Type::CLK_TO_Q) { - segmentJson["type"] = "clk-to-q"; - } else if (segment.type == CriticalPath::Segment::Type::SOURCE) { - segmentJson["type"] = "source"; - } else if (segment.type == CriticalPath::Segment::Type::LOGIC) { - segmentJson["type"] = "logic"; - } else if (segment.type == CriticalPath::Segment::Type::SETUP) { - segmentJson["type"] = "setup"; - } else if (segment.type == CriticalPath::Segment::Type::ROUTING) { - segmentJson["type"] = "routing"; + auto segmentJson = Json::object({ + {"delay", ctx->getDelayNS(segment.delay)}, + {"from", fromJson}, + {"to", toJson}, + }); + + segmentJson["type"] = CriticalPath::Segment::type_to_str(segment.type); + if (segment.type == CriticalPath::Segment::Type::ROUTING) { segmentJson["net"] = segment.net.c_str(ctx); } @@ -194,10 +186,7 @@ Report JSON structure: }, "type": , "net": , - "delay": [ - , - , - ], + "delay": , } ... ] diff --git a/common/kernel/timing.cc b/common/kernel/timing.cc index 105d20f906..880cc50691 100644 --- a/common/kernel/timing.cc +++ b/common/kernel/timing.cc @@ -25,7 +25,6 @@ #include #include #include -#include "log.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -173,6 +172,7 @@ void TimingAnalyser::get_route_delays() NetInfo *ni = net.second.get(); if (ni->driver.cell == nullptr || ni->driver.cell->bel == BelId()) continue; + for (auto &usr : ni->users) { if (usr.cell->bel == BelId()) continue; @@ -566,12 +566,15 @@ void TimingAnalyser::walk_forward() auto &pd = ports.at(sp.first); DelayPair init_arrival(0); CellPortKey clock_key; - // TODO: clock routing delay, if analysis of that is enabled if (sp.second != IdString()) { // clocked startpoints have a clock-to-out time for (auto &fanin : pd.cell_arcs) { if (fanin.type == CellArc::CLK_TO_Q && fanin.other_port == sp.second) { - init_arrival = init_arrival + fanin.value.delayPair(); + init_arrival += fanin.value.delayPair(); + // Include the clock delay if clock_skew analysis is enabled + if (with_clock_skew) { + init_arrival += ports.at(CellPortKey(sp.first.cell, fanin.other_port)).route_delay; + } break; } } @@ -591,16 +594,18 @@ void TimingAnalyser::walk_forward() for (auto &usr : net->users) { CellPortKey usr_key(usr); auto &usr_pd = ports.at(usr_key); - set_arrival_time(usr_key, arr.first, arr.second.value + usr_pd.route_delay, - arr.second.path_length, p); + auto next_arr = arr.second.value + usr_pd.route_delay; + set_arrival_time(usr_key, arr.first, next_arr, arr.second.path_length, p); } } else if (pd.type == PORT_IN) { // Input port; propagate delay through cell, adding combinational delay for (auto &fanout : pd.cell_arcs) { if (fanout.type != CellArc::COMBINATIONAL) continue; - set_arrival_time(CellPortKey(p.cell, fanout.other_port), arr.first, - arr.second.value + fanout.value.delayPair(), arr.second.path_length + 1, p); + + auto next_arr = arr.second.value + fanout.value.delayPair(); + set_arrival_time(CellPortKey(p.cell, fanout.other_port), arr.first, next_arr, + arr.second.path_length + 1, p); } } } @@ -616,20 +621,25 @@ void TimingAnalyser::walk_backward() auto &dom = domains.at(dom_id); for (auto &ep : dom.endpoints) { auto &pd = ports.at(ep.first); - DelayPair init_setuphold(0); + DelayPair init_required(0); CellPortKey clock_key; // TODO: clock routing delay, if analysis of that is enabled if (ep.second != IdString()) { // Add setup/hold time, if this endpoint is clocked for (auto &fanin : pd.cell_arcs) { - if (fanin.type == CellArc::SETUP && fanin.other_port == ep.second) - init_setuphold.min_delay -= fanin.value.maxDelay(); + + if (fanin.type == CellArc::SETUP && fanin.other_port == ep.second) { + if (with_clock_skew) { + init_required += ports.at(CellPortKey(ep.first.cell, fanin.other_port)).route_delay; + } + init_required.min_delay -= fanin.value.maxDelay(); + } if (fanin.type == CellArc::HOLD && fanin.other_port == ep.second) - init_setuphold.max_delay -= fanin.value.maxDelay(); + init_required.max_delay += fanin.value.maxDelay(); } clock_key = CellPortKey(ep.first.cell, ep.second); } - set_required_time(ep.first, dom_id, init_setuphold, 1, clock_key); + set_required_time(ep.first, dom_id, init_required, 1, clock_key); } } // Walk backwards in topological order @@ -661,18 +671,57 @@ dict TimingAnalyser::max_delay_by_domain_pairs() { dict domain_delay; - for (auto p : topological_order) { - auto &pd = ports.at(p); - for (auto &req : pd.required) { - auto &capture = req.first; - for (auto &arr : pd.arrival) { - auto &launch = arr.first; + for (domain_id_t capture_id = 0; capture_id < domain_id_t(domains.size()); ++capture_id) { + const auto &capture = domains.at(capture_id); + + for (auto &ep : capture.endpoints) { + auto &ep_port = ports.at(ep.first); + + auto &req = ep_port.required.at(capture_id); + + for (auto &[launch_id, arr] : ep_port.arrival) { + const auto &launch = domains.at(capture_id); + + auto dp = domain_pair_id(launch_id, capture_id); + + auto clocks = std::make_pair(launch.key.clock, capture.key.clock); + auto same_clock = capture_id == launch_id; + auto related_clocks = clock_delays.count(clocks) > 0; + delay_t clock_to_clock = 0; + if (related_clocks) { + clock_to_clock = clock_delays.at(clocks); + } - auto dp = domain_pair_id(launch, capture); + auto delay = arr.value.maxDelay() - req.value.minDelay() + clock_to_clock; + + // If domains are unrelated or not the same clock we need to make sure + // to remove the clock delays from the arrival and required times + // because the delays have no common reference. + if (with_clock_skew && !same_clock && !related_clocks) { + for (auto &fanin : ep_port.cell_arcs) { + if (fanin.type == CellArc::SETUP) { + auto clock_delay = ports.at(CellPortKey(ep.first.cell, fanin.other_port)).route_delay; + delay += clock_delay.minDelay(); + } + } + + // walk back to startpoint + auto crit_path = walk_crit_path(domain_pair_id(launch_id, capture_id), ep.first, true); + auto first_inp = crit_path.back(); + const auto &sp = first_inp.cell->ports.at(first_inp.port).net->driver; + auto &sp_port = ports.at(CellPortKey{sp.cell->name, sp.port}); + + for (auto &fanin : sp_port.cell_arcs) { + if (fanin.type == CellArc::CLK_TO_Q) { + auto clock_delay = ports.at(CellPortKey(sp.cell->name, fanin.other_port)).route_delay; + delay -= clock_delay.maxDelay(); + } + } + } - delay_t delay = arr.second.value.maxDelay() - req.second.value.minDelay(); - if (!domain_delay.count(dp) || domain_delay.at(dp) < delay) + if (!domain_delay.count(dp) || domain_delay.at(dp) < delay) { domain_delay[dp] = delay; + } } } } @@ -796,44 +845,19 @@ std::vector TimingAnalyser::get_worst_eps(domain_id_t domain_pair, return worst_eps; } -CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, CellPortKey endpoint) +std::vector TimingAnalyser::walk_crit_path(domain_id_t domain_pair, CellPortKey endpoint, bool longest_path) { - CriticalPath report; - - auto &dp = domain_pairs.at(domain_pair); - auto &launch = domains.at(dp.key.launch).key; - auto &capture = domains.at(dp.key.capture).key; - - report.delay = DelayPair(0); - - report.clock_pair.start.clock = launch.clock; - report.clock_pair.start.edge = launch.edge; - report.clock_pair.end.clock = capture.clock; - report.clock_pair.end.edge = capture.edge; - - report.bound = DelayPair(0, ctx->getDelayFromNS(1.0e9 / ctx->setting("target_freq"))); - if (launch.edge != capture.edge) { - report.bound.max_delay = report.bound.max_delay / 2; - } - - if (!launch.is_async() && ctx->nets.at(launch.clock)->clkconstr) { - if (launch.edge == capture.edge) { - report.bound.max_delay = ctx->nets.at(launch.clock)->clkconstr->period.minDelay(); - } else if (capture.edge == RISING_EDGE) { - report.bound.max_delay = ctx->nets.at(launch.clock)->clkconstr->low.minDelay(); - } else if (capture.edge == FALLING_EDGE) { - report.bound.max_delay = ctx->nets.at(launch.clock)->clkconstr->high.minDelay(); - } - } + const auto &dp = domain_pairs.at(domain_pair); + // Walk the min or max path backwards to find a single crit path pool> visited; std::vector crit_path_rev; auto cursor = endpoint; - while (cursor != CellPortKey()) { + bool is_startpoint = false; + do { auto cell = cell_info(cursor); auto &port = port_info(cursor); - int port_clocks; auto portClass = ctx->getPortTimingClass(cell, port.name, port_clocks); @@ -841,40 +865,153 @@ CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, if (!visited.insert(std::make_pair(cell->name, port.name)).second) break; - if (portClass != TMG_CLOCK_INPUT && portClass != TMG_IGNORE && port.type == PortType::PORT_IN) + // We store the reversed critical path as all input ports that lead to + // the timing startpoint. + auto is_input = portClass != TMG_CLOCK_INPUT && portClass != TMG_IGNORE && port.type == PortType::PORT_IN; + + if (is_input) crit_path_rev.emplace_back(PortRef{cell, port.name}); if (!ports.at(cursor).arrival.count(dp.key.launch)) break; - cursor = ports.at(cursor).arrival.at(dp.key.launch).bwd_max; + if (longest_path) { + cursor = ports.at(cursor).arrival.at(dp.key.launch).bwd_max; + } else { + cursor = ports.at(cursor).arrival.at(dp.key.launch).bwd_min; + } + is_startpoint = portClass == TMG_REGISTER_OUTPUT || portClass == TMG_STARTPOINT; + } while (!is_startpoint); + + return crit_path_rev; +} + +CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, CellPortKey endpoint, + bool longest_path) +{ + CriticalPath report; + + const auto &dp = domain_pairs.at(domain_pair); + const auto &launch = domains.at(dp.key.launch).key; + const auto &capture = domains.at(dp.key.capture).key; + + report.clock_pair.start.clock = launch.clock; + report.clock_pair.start.edge = launch.edge; + report.clock_pair.end.clock = capture.clock; + report.clock_pair.end.edge = capture.edge; + + report.max_delay = ctx->getDelayFromNS(1.0e9 / ctx->setting("target_freq")); + if (launch.edge != capture.edge) { + report.max_delay = report.max_delay / 2; + } + + if (!launch.is_async() && ctx->nets.at(launch.clock)->clkconstr) { + if (launch.edge == capture.edge) { + report.max_delay = ctx->nets.at(launch.clock)->clkconstr->period.minDelay(); + } else if (capture.edge == RISING_EDGE) { + report.max_delay = ctx->nets.at(launch.clock)->clkconstr->low.minDelay(); + } else if (capture.edge == FALLING_EDGE) { + report.max_delay = ctx->nets.at(launch.clock)->clkconstr->high.minDelay(); + } } + auto crit_path_rev = walk_crit_path(domain_pair, endpoint, longest_path); auto crit_path = boost::adaptors::reverse(crit_path_rev); - auto &front = crit_path.front(); - auto &front_port = front.cell->ports.at(front.port); - auto &front_driver = front_port.net->driver; + // Get timing and clocking info on the startpoint + auto first_inp = crit_path.front(); + const auto &sp = first_inp.cell->ports.at(first_inp.port).net->driver; + const auto &sp_cell = sp.cell; + const auto &sp_port = sp_cell->ports.at(sp.port); + int sp_clocks; + const auto sp_portClass = ctx->getPortTimingClass(sp_cell, sp_port.name, sp_clocks); + TimingClockingInfo sp_clk_info; + const NetInfo *sp_clk_net = nullptr; + bool register_start = sp_portClass == TMG_REGISTER_OUTPUT; + + if (register_start) { + // If we don't find a clock we don't consider this startpoint to be registered. + register_start = sp_clocks > 0; + for (int i = 0; i < sp_clocks; i++) { + sp_clk_info = ctx->getPortClockingInfo(sp_cell, sp_port.name, i); + const auto clk_net = sp_cell->getPort(sp_clk_info.clock_port); + register_start = clk_net != nullptr && clk_net->name == launch.clock && sp_clk_info.edge == launch.edge; + if (register_start) { + sp_clk_net = clk_net; + break; + } + } + } - int port_clocks; - auto portClass = ctx->getPortTimingClass(front_driver.cell, front_driver.port, port_clocks); + // Get timing and clocking info on the endpoint + const auto &ep = crit_path.back(); + const auto &ep_cell = ep.cell; + const auto &ep_port = ep_cell->ports.at(ep.port); + int ep_clocks; + const auto ep_portClass = ctx->getPortTimingClass(ep_cell, ep_port.name, ep_clocks); + TimingClockingInfo ep_clk_info; + const NetInfo *ep_clk_net = nullptr; + + bool register_end = ep_portClass == TMG_REGISTER_INPUT; + + if (register_end) { + // If we don't find a clock we don't consider this startpoint to be registered. + register_end = ep_clocks > 0; + for (int i = 0; i < ep_clocks; i++) { + ep_clk_info = ctx->getPortClockingInfo(ep_cell, ep_port.name, i); + const auto clk_net = ep_cell->getPort(ep_clk_info.clock_port); + + register_end = clk_net != nullptr && clk_net->name == capture.clock && ep_clk_info.edge == capture.edge; + if (register_end) { + ep_clk_net = clk_net; + break; + } + } + } - const CellInfo *last_cell = front.cell; - IdString last_port = front_driver.port; + auto clock_pair = std::make_pair(launch.clock, capture.clock); + auto related_clock = clock_delays.count(clock_pair) > 0; + auto same_clock = launch.clock == capture.clock; + + if (related_clock) { + delay_t clock_delay = clock_delays.at(clock_pair); + if (!is_zero_delay(clock_delay)) { + CriticalPath::Segment seg_c2c; + seg_c2c.type = CriticalPath::Segment::Type::CLK_TO_CLK; + seg_c2c.delay = clock_delay; + seg_c2c.from = std::make_pair(sp_cell->name, sp_clk_info.clock_port); + seg_c2c.to = std::make_pair(ep_cell->name, ep_clk_info.clock_port); + seg_c2c.net = IdString(); + report.segments.push_back(seg_c2c); + } + } - int clock_start = -1; - if (portClass == TMG_REGISTER_OUTPUT) { - for (int i = 0; i < port_clocks; i++) { - TimingClockingInfo clockInfo = ctx->getPortClockingInfo(front_driver.cell, front_driver.port, i); - const NetInfo *clknet = front_driver.cell->getPort(clockInfo.clock_port); - if (clknet != nullptr && clknet->name == launch.clock && clockInfo.edge == launch.edge) { - last_port = clockInfo.clock_port; - clock_start = i; - break; + if (with_clock_skew && register_start && register_end && (same_clock || related_clock)) { + + auto clock_delay_launch = ctx->getNetinfoRouteDelay(sp_clk_net, PortRef{sp_cell, sp_clk_info.clock_port}); + auto clock_delay_capture = ctx->getNetinfoRouteDelay(ep_clk_net, PortRef{ep_cell, ep_clk_info.clock_port}); + + delay_t clock_skew = clock_delay_launch - clock_delay_capture; + + if (!is_zero_delay(clock_skew)) { + CriticalPath::Segment seg_skew; + seg_skew.type = CriticalPath::Segment::Type::CLK_SKEW; + seg_skew.delay = clock_skew; + seg_skew.from = std::make_pair(sp_cell->name, sp_clk_info.clock_port); + seg_skew.to = std::make_pair(ep_cell->name, ep_clk_info.clock_port); + if (same_clock) { + seg_skew.net = launch.clock; + } else { + seg_skew.net = IdString(); } + report.segments.push_back(seg_skew); } } + const CellInfo *prev_cell = sp_cell; + IdString prev_port = sp_port.name; + + bool is_startpoint = true; for (auto sink : crit_path) { auto sink_cell = sink.cell; auto &port = sink_cell->ports.at(sink.port); @@ -885,22 +1022,19 @@ CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, CriticalPath::Segment seg_logic; DelayQuad comb_delay; - if (clock_start != -1) { - auto clockInfo = ctx->getPortClockingInfo(driver_cell, driver.port, clock_start); - comb_delay = clockInfo.clockToQ; - clock_start = -1; + if (is_startpoint && register_start) { + comb_delay = sp_clk_info.clockToQ; seg_logic.type = CriticalPath::Segment::Type::CLK_TO_Q; - } else if (last_port == driver.port) { - // Case where we start with a STARTPOINT etc + } else if (is_startpoint) { comb_delay = DelayQuad(0); seg_logic.type = CriticalPath::Segment::Type::SOURCE; } else { - ctx->getCellDelay(driver_cell, last_port, driver.port, comb_delay); + ctx->getCellDelay(driver_cell, prev_port, driver.port, comb_delay); seg_logic.type = CriticalPath::Segment::Type::LOGIC; } - seg_logic.delay = comb_delay.delayPair(); - seg_logic.from = std::make_pair(last_cell->name, last_port); + seg_logic.delay = longest_path ? comb_delay.maxDelay() : comb_delay.minDelay(); + seg_logic.from = std::make_pair(prev_cell->name, prev_port); seg_logic.to = std::make_pair(driver_cell->name, driver.port); seg_logic.net = IdString(); report.segments.push_back(seg_logic); @@ -909,26 +1043,28 @@ CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, CriticalPath::Segment seg_route; seg_route.type = CriticalPath::Segment::Type::ROUTING; - seg_route.delay = net_delay; + seg_route.delay = longest_path ? net_delay.maxDelay() : net_delay.minDelay(); seg_route.from = std::make_pair(driver_cell->name, driver.port); seg_route.to = std::make_pair(sink_cell->name, sink.port); seg_route.net = net->name; report.segments.push_back(seg_route); - last_cell = sink_cell; - last_port = sink.port; + prev_cell = sink_cell; + prev_port = sink.port; + is_startpoint = false; } - int clockCount = 0; - auto sinkClass = ctx->getPortTimingClass(crit_path.back().cell, crit_path.back().port, clockCount); - if (sinkClass == TMG_REGISTER_INPUT && clockCount > 0) { - auto sinkClockInfo = ctx->getPortClockingInfo(crit_path.back().cell, crit_path.back().port, 0); - auto setup = sinkClockInfo.setup; - + if (register_end) { CriticalPath::Segment seg_logic; - seg_logic.type = CriticalPath::Segment::Type::SETUP; - seg_logic.delay = setup; - seg_logic.from = std::make_pair(last_cell->name, last_port); + seg_logic.delay = 0; + if (longest_path) { + seg_logic.type = CriticalPath::Segment::Type::SETUP; + seg_logic.delay += ep_clk_info.setup.maxDelay(); + } else { + seg_logic.type = CriticalPath::Segment::Type::HOLD; + seg_logic.delay -= ep_clk_info.hold.maxDelay(); + } + seg_logic.from = std::make_pair(prev_cell->name, prev_port); seg_logic.to = seg_logic.from; seg_logic.net = IdString(); report.segments.push_back(seg_logic); @@ -943,7 +1079,10 @@ void TimingAnalyser::build_crit_path_reports() auto &xclock_reports = result.xclock_paths; auto &clock_fmax = result.clock_fmax; auto &empty_clocks = result.empty_paths; - auto &clock_delays_ctx = result.clock_delays; + + if (!setup_only) { + result.min_delay_violations = get_min_delay_violations(); + } auto delay_by_domain = max_delay_by_domain_pairs(); @@ -980,7 +1119,7 @@ void TimingAnalyser::build_crit_path_reports() clock_fmax[launch.clock].achieved = Fmax; clock_fmax[launch.clock].constraint = target; - clock_reports[launch.clock] = build_critical_path_report(i, worst_endpoint.at(0)); + clock_reports[launch.clock] = build_critical_path_report(i, worst_endpoint.at(0), true); empty_clocks.erase(launch.clock); } @@ -998,7 +1137,7 @@ void TimingAnalyser::build_crit_path_reports() if (worst_endpoint.empty()) continue; - xclock_reports.emplace_back(build_critical_path_report(i, worst_endpoint.at(0))); + xclock_reports.emplace_back(build_critical_path_report(i, worst_endpoint.at(0), true)); } auto cmp_crit_path = [&](const CriticalPath &ra, const CriticalPath &rb) { @@ -1023,8 +1162,6 @@ void TimingAnalyser::build_crit_path_reports() }; std::sort(xclock_reports.begin(), xclock_reports.end(), cmp_crit_path); - - clock_delays_ctx = clock_delays; } void TimingAnalyser::build_slack_histogram_report() @@ -1061,6 +1198,76 @@ void TimingAnalyser::build_slack_histogram_report() } } +std::vector TimingAnalyser::get_min_delay_violations() +{ + std::vector violations; + + for (domain_id_t capture_id = 0; capture_id < domain_id_t(domains.size()); ++capture_id) { + const auto &capture = domains.at(capture_id); + const auto &capture_clock = capture.key.clock; + + for (const auto &ep : capture.endpoints) { + const CellInfo *ci = cell_info(ep.first); + int clkInfoCount = 0; + const TimingPortClass cls = ctx->getPortTimingClass(ci, ep.first.port, clkInfoCount); + if (cls != TMG_REGISTER_INPUT) + continue; + + const auto &port = ports.at(ep.first); + + const auto &req = port.required.at(capture_id); + + for (auto &[launch_id, arr] : port.arrival) { + const auto &launch = domains.at(launch_id); + const auto &launch_clock = launch.key.clock; + const auto dom_pair_id = domain_pair_id(launch_id, capture_id); + + auto clocks = std::make_pair(launch_clock, capture_clock); + auto related_clocks = clock_delays.count(clocks) > 0; + + if (launch_id == async_clock_id || (launch_id != capture_id && !related_clocks)) { + continue; + } + + delay_t clock_to_clock = 0; + if (related_clocks) { + clock_to_clock = clock_delays.at(clocks); + } + + auto hold_slack = arr.value.minDelay() - req.value.maxDelay() + clock_to_clock; + + if (hold_slack <= 0) { + auto report = build_critical_path_report(dom_pair_id, ep.first, false); + violations.emplace_back(report); + } + } + } + } + + std::vector> sum_indices; + sum_indices.reserve(violations.size()); + + for (size_t i = 0; i < violations.size(); ++i) { + delay_t delay = 0; + for (const auto &seg : violations[i].segments) { + delay += seg.delay; + } + + sum_indices.emplace_back(i, delay); + } + + std::sort(sum_indices.begin(), sum_indices.end(), + [](auto &left, auto &right) { return left.second < right.second; }); + + std::vector sorted_violations; + sorted_violations.reserve(violations.size()); + for (const auto &pair : sum_indices) { + sorted_violations.push_back(std::move(violations[pair.first])); + } + + return sorted_violations; +} + domain_id_t TimingAnalyser::domain_id(IdString cell, IdString clock_port, ClockEdge edge) { return domain_id(ctx->cells.at(cell)->ports.at(clock_port).net, edge); @@ -1095,6 +1302,26 @@ void TimingAnalyser::copy_domains(const CellPortKey &from, const CellPortKey &to } } +const std::string TimingAnalyser::arcType_to_str(CellArc::ArcType typ) +{ + switch (typ) { + case TimingAnalyser::CellArc::COMBINATIONAL: + return "COMBINATIONAL"; + case TimingAnalyser::CellArc::SETUP: + return "SETUP"; + case TimingAnalyser::CellArc::HOLD: + return "HOLD"; + case TimingAnalyser::CellArc::CLK_TO_Q: + return "CLK_TO_Q"; + case TimingAnalyser::CellArc::STARTPOINT: + return "STARTPOINT"; + case TimingAnalyser::CellArc::ENDPOINT: + return "ENDPOINT"; + default: + NPNR_ASSERT_FALSE("Impossible CellArc::ArcType\n"); + } +} + CellInfo *TimingAnalyser::cell_info(const CellPortKey &key) { return ctx->cells.at(key.cell).get(); } PortInfo &TimingAnalyser::port_info(const CellPortKey &key) { return ctx->cells.at(key.cell)->ports.at(key.port); } @@ -1103,6 +1330,8 @@ void timing_analysis(Context *ctx, bool print_slack_histogram, bool print_fmax, bool update_results) { TimingAnalyser tmg(ctx); + tmg.setup_only = false; + tmg.with_clock_skew = true; tmg.setup(ctx->detailed_timing_report, print_slack_histogram, print_path || print_fmax); auto &result = tmg.get_timing_result(); diff --git a/common/kernel/timing.h b/common/kernel/timing.h index dae3ba27e1..f82a1b6da6 100644 --- a/common/kernel/timing.h +++ b/common/kernel/timing.h @@ -27,8 +27,8 @@ NEXTPNR_NAMESPACE_BEGIN struct CellPortKey { - CellPortKey() {}; - CellPortKey(IdString cell, IdString port) : cell(cell), port(port) {}; + CellPortKey(){}; + CellPortKey(IdString cell, IdString port) : cell(cell), port(port){}; explicit CellPortKey(const PortRef &pr) { NPNR_ASSERT(pr.cell != nullptr); @@ -49,7 +49,7 @@ struct ClockDomainKey { IdString clock; ClockEdge edge; - ClockDomainKey(IdString clock_net, ClockEdge edge) : clock(clock_net), edge(edge) {}; + ClockDomainKey(IdString clock_net, ClockEdge edge) : clock(clock_net), edge(edge){}; // probably also need something here to deal with constraints inline bool is_async() const { return clock == IdString(); } @@ -63,7 +63,7 @@ typedef int domain_id_t; struct ClockDomainPairKey { domain_id_t launch, capture; - ClockDomainPairKey(domain_id_t launch, domain_id_t capture) : launch(launch), capture(capture) {}; + ClockDomainPairKey(domain_id_t launch, domain_id_t capture) : launch(launch), capture(capture){}; inline bool operator==(const ClockDomainPairKey &other) const { return (launch == other.launch) && (capture == other.capture); @@ -98,6 +98,9 @@ struct TimingAnalyser TimingResult &get_timing_result() { return result; } + // Enable analysis of clock skew between FFs. + bool with_clock_skew = false; + bool setup_only = false; bool have_loops = false; bool updated_domains = false; @@ -118,11 +121,19 @@ struct TimingAnalyser void compute_slack(); void compute_criticality(); + // Walk the endpoint back to a startpoint and get back the input ports walked + // and the startpoint. + std::vector walk_crit_path(domain_id_t domain_pair, CellPortKey endpoint, bool longest_path); + void build_detailed_net_timing_report(); - CriticalPath build_critical_path_report(domain_id_t domain_pair, CellPortKey endpoint); + // longest_path indicate whether to follow the longest or shortest path from endpoint to startpoint + // longest paths are interesting for setup violations and shortest paths are interesting for hold violations + CriticalPath build_critical_path_report(domain_id_t domain_pair, CellPortKey endpoint, bool longest_path); void build_crit_path_reports(); void build_slack_histogram_report(); + std::vector get_min_delay_violations(); + dict max_delay_by_domain_pairs(); // get the N worst endpoints for a given domain pair @@ -172,9 +183,9 @@ struct TimingAnalyser ClockEdge edge; CellArc(ArcType type, IdString other_port, DelayQuad value) - : type(type), other_port(other_port), value(value), edge(RISING_EDGE) {}; + : type(type), other_port(other_port), value(value), edge(RISING_EDGE){}; CellArc(ArcType type, IdString other_port, DelayQuad value, ClockEdge edge) - : type(type), other_port(other_port), value(value), edge(edge) {}; + : type(type), other_port(other_port), value(value), edge(edge){}; }; // Timing data for every cell port @@ -198,7 +209,7 @@ struct TimingAnalyser struct PerDomain { - PerDomain(ClockDomainKey key) : key(key) {}; + PerDomain(ClockDomainKey key) : key(key){}; ClockDomainKey key; // these are pairs (signal port; clock port) std::vector> startpoints, endpoints; @@ -206,7 +217,7 @@ struct TimingAnalyser struct PerDomainPair { - PerDomainPair(ClockDomainPairKey key) : key(key) {}; + PerDomainPair(ClockDomainPairKey key) : key(key){}; ClockDomainPairKey key; DelayPair period{0}; delay_t worst_setup_slack, worst_hold_slack; @@ -221,6 +232,8 @@ struct TimingAnalyser void copy_domains(const CellPortKey &from, const CellPortKey &to, bool backwards); + [[maybe_unused]] static const std::string arcType_to_str(CellArc::ArcType typ); + dict ports; dict domain_to_id; dict pair_to_id; diff --git a/common/kernel/timing_log.cc b/common/kernel/timing_log.cc index 24aac66589..d0dd8986ba 100644 --- a/common/kernel/timing_log.cc +++ b/common/kernel/timing_log.cc @@ -19,6 +19,7 @@ * */ +#include #include "log.h" #include "nextpnr.h" #include "util.h" @@ -60,44 +61,37 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) source_entries.emplace_back(sourcelist.substr(prev, current - prev)); // Iterate and print our source list at the correct indentation level - log_info(" Defined in:\n"); + log_info(" Defined in:\n"); for (auto entry : source_entries) { - log_info(" %s\n", entry.c_str()); + log_info(" %s\n", entry.c_str()); } }; // A helper function for reporting one critical path auto print_path_report = [ctx](const CriticalPath &path) { - DelayPair total(0), logic_total(0), route_total(0); - - // We print out the max delay since that's usually the interesting case - // But if we know this critical path has violated hold time we print the - // min delay instead - bool hold_violation = path.delay.minDelay() < path.bound.minDelay(); - auto get_delay_ns = [hold_violation, ctx](const DelayPair &d) { - if (hold_violation) { - ctx->getDelayNS(d.minDelay()); - } - return ctx->getDelayNS(d.maxDelay()); - }; + delay_t total(0), logic_total(0), route_total(0); - log_info("curr total\n"); + log_info(" type curr total name\n"); for (const auto &segment : path.segments) { - total += segment.delay; + delay_t delay = segment.delay; + + total += delay; if (segment.type == CriticalPath::Segment::Type::CLK_TO_Q || segment.type == CriticalPath::Segment::Type::SOURCE || segment.type == CriticalPath::Segment::Type::LOGIC || - segment.type == CriticalPath::Segment::Type::SETUP) { - logic_total += segment.delay; - - const std::string type_name = (segment.type == CriticalPath::Segment::Type::SETUP) ? "Setup" : "Source"; - - log_info("%4.1f %4.1f %s %s.%s\n", get_delay_ns(segment.delay), get_delay_ns(total), type_name.c_str(), - segment.to.first.c_str(ctx), segment.to.second.c_str(ctx)); - } else if (segment.type == CriticalPath::Segment::Type::ROUTING) { - route_total = route_total + segment.delay; + segment.type == CriticalPath::Segment::Type::SETUP || + segment.type == CriticalPath::Segment::Type::HOLD) { + logic_total += delay; + + log_info("%10s % 5.2f % 5.2f Source %s.%s\n", CriticalPath::Segment::type_to_str(segment.type).c_str(), + ctx->getDelayNS(delay), ctx->getDelayNS(total), segment.to.first.c_str(ctx), + segment.to.second.c_str(ctx)); + } else if (segment.type == CriticalPath::Segment::Type::ROUTING || + segment.type == CriticalPath::Segment::Type::CLK_TO_CLK || + segment.type == CriticalPath::Segment::Type::CLK_SKEW) { + route_total = route_total + delay; const auto &driver = ctx->cells.at(segment.from.first); const auto &sink = ctx->cells.at(segment.to.first); @@ -105,9 +99,12 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) auto driver_loc = ctx->getBelLocation(driver->bel); auto sink_loc = ctx->getBelLocation(sink->bel); - log_info("%4.1f %4.1f Net %s (%d,%d) -> (%d,%d)\n", get_delay_ns(segment.delay), get_delay_ns(total), - segment.net.c_str(ctx), driver_loc.x, driver_loc.y, sink_loc.x, sink_loc.y); - log_info(" Sink %s.%s\n", segment.to.first.c_str(ctx), segment.to.second.c_str(ctx)); + log_info("%10s % 5.2f % 5.2f Net %s (%d,%d) -> (%d,%d)\n", + CriticalPath::Segment::type_to_str(segment.type).c_str(), ctx->getDelayNS(delay), + ctx->getDelayNS(total), segment.net.c_str(ctx), driver_loc.x, driver_loc.y, sink_loc.x, + sink_loc.y); + log_info(" Sink %s.%s\n", segment.to.first.c_str(ctx), + segment.to.second.c_str(ctx)); const NetInfo *net = ctx->nets.at(segment.net).get(); @@ -119,7 +116,7 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) auto driver_wire = ctx->getNetinfoSourceWire(net); auto sink_wire = ctx->getNetinfoSinkWire(net, sink_ref, 0); - log_info(" prediction: %f ns estimate: %f ns\n", + log_info(" prediction: %f ns estimate: %f ns\n", ctx->getDelayNS(ctx->predictArcDelay(net, sink_ref)), ctx->getDelayNS(ctx->estimateDelay(driver_wire, sink_wire))); auto cursor = sink_wire; @@ -144,7 +141,7 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) } } } - log_info("%.1f ns logic, %.1f ns routing\n", get_delay_ns(logic_total), get_delay_ns(route_total)); + log_info("%.2f ns logic, %.2f ns routing\n", ctx->getDelayNS(logic_total), ctx->getDelayNS(route_total)); }; // Single domain paths @@ -168,12 +165,44 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) log_info("Critical path report for cross-domain path '%s' -> '%s':\n", start.c_str(), end.c_str()); print_path_report(report); } -}; + + // Min delay violated paths + // Show maximum of 10 + auto num_min_violations = result.min_delay_violations.size(); + bool allow_fail = bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false); + if (num_min_violations > 0) { + log_break(); + log_info("%zu Hold/min time violations (showing 10 worst paths):\n", num_min_violations); + for (size_t i = 0; i < std::min((size_t)10, num_min_violations); ++i) { + auto &report = result.min_delay_violations.at(i); + log_break(); + std::string start = clock_event_name(ctx, report.clock_pair.start); + std::string end = clock_event_name(ctx, report.clock_pair.end); + + std::string message; + if (report.clock_pair.start == report.clock_pair.end) { + message = "Hold/min time violation for clock '" + start + "':\n"; + } else { + message = "Hold/min time violation for path '" + start + "' -> '" + end + "':\n"; + } + + if (allow_fail) { + log_warning("%s", message.c_str()); + } else { + log_nonfatal_error("%s", message.c_str()); + } + + print_path_report(report); + } + } +} static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) { log_break(); + bool allow_fail = bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false); + if (result.clock_paths.empty() && result.clock_paths.empty()) { log_info("No Fmax available; no interior timing paths found in design.\n"); return; @@ -194,7 +223,7 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) if (!warn_on_failure || passed) log_info("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "", clock_name.c_str(), fmax, passed ? "PASS" : "FAIL", target); - else if (bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false)) + else if (allow_fail) log_warning("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "", clock_name.c_str(), fmax, passed ? "PASS" : "FAIL", target); else @@ -206,12 +235,20 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) // Clock to clock delays for xpaths dict xclock_delays; for (auto &report : result.xclock_paths) { - const auto &clock1_name = report.clock_pair.start.clock; - const auto &clock2_name = report.clock_pair.end.clock; + // Check if this path has a clock-2-clock delay + // clock-2-clock delays are always the first segment in the path + // But we walk the entire path anyway. + bool has_clock_to_clock = false; + delay_t clock_delay = 0; + for (const auto &seg : report.segments) { + if (seg.type == CriticalPath::Segment::Type::CLK_TO_CLK) { + has_clock_to_clock = true; + clock_delay += seg.delay; + } + } - const auto key = std::make_pair(clock1_name, clock2_name); - if (result.clock_delays.count(key)) { - xclock_delays[report.clock_pair] = result.clock_delays.at(key); + if (has_clock_to_clock) { + xclock_delays[report.clock_pair] = clock_delay; } } @@ -228,12 +265,11 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) const auto &clock_a = report.clock_pair.start.clock; const auto &clock_b = report.clock_pair.end.clock; - const auto key = std::make_pair(clock_a, clock_b); - if (!result.clock_delays.count(key)) { + if (!xclock_delays.count(report.clock_pair)) { continue; } - DelayPair path_delay(0); + delay_t path_delay = 0; for (const auto &segment : report.segments) { path_delay += segment.delay; } @@ -241,14 +277,13 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) // Compensate path delay for clock-to-clock delay. If the // result is negative then only the latter matters. Otherwise // the compensated path delay is taken. - auto clock_delay = result.clock_delays.at(key); - path_delay -= DelayPair(clock_delay); + auto clock_delay = xclock_delays.at(report.clock_pair); float fmax = std::numeric_limits::infinity(); - if (path_delay.maxDelay() < 0) { + if (path_delay < 0) { fmax = 1e3f / ctx->getDelayNS(clock_delay); - } else if (path_delay.maxDelay() > 0) { - fmax = 1e3f / ctx->getDelayNS(path_delay.maxDelay()); + } else if (path_delay > 0) { + fmax = 1e3f / ctx->getDelayNS(path_delay); } // Both clocks are related so they should have the same @@ -273,8 +308,7 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) if (!warn_on_failure || passed) log_info("Max frequency for %s -> %s: %.02f MHz (%s at %.02f MHz)\n", ev_a.c_str(), ev_b.c_str(), fmax, passed ? "PASS" : "FAIL", target); - else if (bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false) || - bool_or_default(ctx->settings, ctx->id("timing/ignoreRelClk"), false)) + else if (allow_fail || bool_or_default(ctx->settings, ctx->id("timing/ignoreRelClk"), false)) log_warning("Max frequency for %s -> %s: %.02f MHz (%s at %.02f MHz)\n", ev_a.c_str(), ev_b.c_str(), fmax, passed ? "PASS" : "FAIL", target); else @@ -285,7 +319,7 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) } // Report clock delays for xpaths - if (!result.clock_delays.empty()) { + if (!xclock_delays.empty()) { for (auto &pair : xclock_delays) { auto ev_a = clock_event_name(ctx, pair.first.start, max_width_xca); auto ev_b = clock_event_name(ctx, pair.first.end, max_width_xcb); @@ -316,12 +350,12 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) for (auto &report : result.xclock_paths) { const ClockEvent &a = report.clock_pair.start; const ClockEvent &b = report.clock_pair.end; - DelayPair path_delay(0); + delay_t path_delay = 0; for (const auto &segment : report.segments) { path_delay += segment.delay; } auto ev_a = clock_event_name(ctx, a, start_field_width), ev_b = clock_event_name(ctx, b, end_field_width); - log_info("Max delay %s -> %s: %0.02f ns\n", ev_a.c_str(), ev_b.c_str(), ctx->getDelayNS(path_delay.maxDelay())); + log_info("Max delay %s -> %s: %0.02f ns\n", ev_a.c_str(), ev_b.c_str(), ctx->getDelayNS(path_delay)); } log_break(); } diff --git a/common/route/router1.cc b/common/route/router1.cc index a3ebdb60c1..510065f611 100644 --- a/common/route/router1.cc +++ b/common/route/router1.cc @@ -124,6 +124,8 @@ struct Router1 Router1(Context *ctx, const Router1Cfg &cfg) : ctx(ctx), cfg(cfg), tmg(ctx) { timing_driven = ctx->setting("timing_driven"); + tmg.setup_only = false; + tmg.with_clock_skew = true; tmg.setup(); tmg.run(); } diff --git a/common/route/router2.cc b/common/route/router2.cc index 8dcec6e91c..53596c24af 100644 --- a/common/route/router2.cc +++ b/common/route/router2.cc @@ -102,7 +102,12 @@ struct Router2 Context *ctx; Router2Cfg cfg; - Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg), tmg(ctx) { tmg.setup(); } + Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg), tmg(ctx) + { + tmg.setup_only = false; + tmg.with_clock_skew = true; + tmg.setup(); + } // Use 'udata' for fast net lookups and indexing std::vector nets_by_udata;